{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Off-Policy VPG to Play CartPole-v0\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import imp\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import losses\n",
    "\n",
    "imp.reload(logging)\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:29:13 [INFO] env: <CartPoleEnv<CartPole-v0>>\n",
      "11:29:13 [INFO] action_space: Discrete(2)\n",
      "11:29:13 [INFO] observation_space: Box(4,)\n",
      "11:29:13 [INFO] reward_range: (-inf, inf)\n",
      "11:29:13 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "11:29:13 [INFO] _max_episode_steps: 200\n",
      "11:29:13 [INFO] _elapsed_steps: None\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class OffPolicyVPGAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        def dot(y_true, y_pred):\n",
    "            return -tf.reduce_sum(y_true * y_pred, axis=-1)\n",
    "\n",
    "        self.policy_net = self.build_net(hidden_sizes=[],\n",
    "                output_size=self.action_n,\n",
    "                output_activation=nn.softmax,\n",
    "                loss=dot, learning_rate=0.06)\n",
    "    \n",
    "    def build_net(self, hidden_sizes, output_size,\n",
    "            activation=nn.relu, output_activation=None,\n",
    "            use_bias=False, loss=losses.mse, learning_rate=0.001):\n",
    "        model = keras.Sequential()\n",
    "        for hidden_size in hidden_sizes:\n",
    "            model.add(layers.Dense(units=hidden_size,\n",
    "                    activation=activation, use_bias=use_bias))\n",
    "        model.add(layers.Dense(units=output_size,\n",
    "                activation=output_activation, use_bias=use_bias))\n",
    "        optimizer = optimizers.Adam(learning_rate)\n",
    "        model.compile(optimizer=optimizer, loss=loss)\n",
    "        return model\n",
    "      \n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':            \n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        if self.mode == 'train':\n",
    "            action = np.random.choice(self.action_n) # use random policy\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        else:\n",
    "            probs = self.policy_net.predict(observation[np.newaxis])[0]\n",
    "            action = np.random.choice(self.action_n, p=probs)\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':            \n",
    "            self.learn()\n",
    "\n",
    "    def learn(self):\n",
    "        df = pd.DataFrame(np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['state', 'reward', 'done', 'action'])\n",
    "        df['discount'] = self.gamma ** df.index.to_series()\n",
    "        df['discounted_reward'] = df['discount'] * df['reward'].astype(float)\n",
    "        df['discounted_return'] = df['discounted_reward'][::-1].cumsum()\n",
    "        states = np.stack(df['state'])\n",
    "        actions = np.eye(self.action_n)[df['action'].astype(int)]\n",
    "        df['behavior_prob'] = 1. / self.action_n\n",
    "        df['sample_weight'] = df['discounted_return'] / df['behavior_prob']\n",
    "        sample_weight = df[['sample_weight',]].values\n",
    "        self.policy_net.fit(states, actions, sample_weight=sample_weight, verbose=0)\n",
    "\n",
    "\n",
    "agent = OffPolicyVPGAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:29:14 [INFO] ==== train & verify ====\n",
      "11:29:15 [INFO] NumExpr defaulting to 4 threads.\n",
      "11:29:20 [DEBUG] verify episode 0: reward = 34.00, steps = 34\n",
      "11:29:21 [DEBUG] verify episode 1: reward = 16.00, steps = 16\n",
      "11:29:22 [DEBUG] verify episode 2: reward = 20.00, steps = 20\n",
      "11:29:23 [DEBUG] verify episode 3: reward = 17.00, steps = 17\n",
      "11:29:25 [DEBUG] verify episode 4: reward = 26.00, steps = 26\n",
      "11:29:25 [DEBUG] verify episode 5: reward = 12.00, steps = 12\n",
      "11:29:27 [DEBUG] verify episode 6: reward = 25.00, steps = 25\n",
      "11:29:28 [DEBUG] verify episode 7: reward = 19.00, steps = 19\n",
      "11:29:30 [DEBUG] verify episode 8: reward = 37.00, steps = 37\n",
      "11:29:31 [DEBUG] verify episode 9: reward = 27.00, steps = 27\n",
      "11:29:32 [DEBUG] verify episode 10: reward = 16.00, steps = 16\n",
      "11:29:33 [DEBUG] verify episode 11: reward = 23.00, steps = 23\n",
      "11:29:34 [DEBUG] verify episode 12: reward = 12.00, steps = 12\n",
      "11:29:35 [DEBUG] verify episode 13: reward = 24.00, steps = 24\n",
      "11:29:36 [DEBUG] verify episode 14: reward = 10.00, steps = 10\n",
      "11:29:37 [DEBUG] verify episode 15: reward = 25.00, steps = 25\n",
      "11:29:38 [DEBUG] verify episode 16: reward = 25.00, steps = 25\n",
      "11:29:40 [DEBUG] verify episode 17: reward = 34.00, steps = 34\n",
      "11:29:41 [DEBUG] verify episode 18: reward = 13.00, steps = 13\n",
      "11:29:42 [DEBUG] verify episode 19: reward = 17.00, steps = 17\n",
      "11:29:43 [DEBUG] verify episode 20: reward = 11.00, steps = 11\n",
      "11:29:44 [DEBUG] verify episode 21: reward = 19.00, steps = 19\n",
      "11:29:48 [DEBUG] verify episode 22: reward = 75.00, steps = 75\n",
      "11:29:49 [DEBUG] verify episode 23: reward = 20.00, steps = 20\n",
      "11:29:52 [DEBUG] verify episode 24: reward = 55.00, steps = 55\n",
      "11:29:53 [DEBUG] verify episode 25: reward = 19.00, steps = 19\n",
      "11:29:56 [DEBUG] verify episode 26: reward = 70.00, steps = 70\n",
      "11:29:56 [DEBUG] verify episode 27: reward = 11.00, steps = 11\n",
      "11:29:58 [DEBUG] verify episode 28: reward = 24.00, steps = 24\n",
      "11:30:02 [DEBUG] verify episode 29: reward = 81.00, steps = 81\n",
      "11:30:05 [DEBUG] verify episode 30: reward = 68.00, steps = 68\n",
      "11:30:10 [DEBUG] verify episode 31: reward = 111.00, steps = 111\n",
      "11:30:12 [DEBUG] verify episode 32: reward = 44.00, steps = 44\n",
      "11:30:14 [DEBUG] verify episode 33: reward = 42.00, steps = 42\n",
      "11:30:15 [DEBUG] verify episode 34: reward = 24.00, steps = 24\n",
      "11:30:18 [DEBUG] verify episode 35: reward = 65.00, steps = 65\n",
      "11:30:20 [DEBUG] verify episode 36: reward = 46.00, steps = 46\n",
      "11:30:23 [DEBUG] verify episode 37: reward = 59.00, steps = 59\n",
      "11:30:27 [DEBUG] verify episode 38: reward = 96.00, steps = 96\n",
      "11:30:30 [DEBUG] verify episode 39: reward = 61.00, steps = 61\n",
      "11:30:33 [DEBUG] verify episode 40: reward = 49.00, steps = 49\n",
      "11:30:37 [DEBUG] verify episode 41: reward = 105.00, steps = 105\n",
      "11:30:41 [DEBUG] verify episode 42: reward = 83.00, steps = 83\n",
      "11:30:46 [DEBUG] verify episode 43: reward = 102.00, steps = 102\n",
      "11:30:48 [DEBUG] verify episode 44: reward = 41.00, steps = 41\n",
      "11:30:51 [DEBUG] verify episode 45: reward = 58.00, steps = 58\n",
      "11:30:53 [DEBUG] verify episode 46: reward = 55.00, steps = 55\n",
      "11:30:56 [DEBUG] verify episode 47: reward = 51.00, steps = 51\n",
      "11:30:58 [DEBUG] verify episode 48: reward = 55.00, steps = 55\n",
      "11:31:03 [DEBUG] verify episode 49: reward = 102.00, steps = 102\n",
      "11:31:06 [DEBUG] verify episode 50: reward = 58.00, steps = 58\n",
      "11:31:09 [DEBUG] verify episode 51: reward = 81.00, steps = 81\n",
      "11:31:11 [DEBUG] verify episode 52: reward = 24.00, steps = 24\n",
      "11:31:13 [DEBUG] verify episode 53: reward = 50.00, steps = 50\n",
      "11:31:16 [DEBUG] verify episode 54: reward = 76.00, steps = 76\n",
      "11:31:19 [DEBUG] verify episode 55: reward = 62.00, steps = 62\n",
      "11:31:22 [DEBUG] verify episode 56: reward = 51.00, steps = 51\n",
      "11:31:26 [DEBUG] verify episode 57: reward = 88.00, steps = 88\n",
      "11:31:28 [DEBUG] verify episode 58: reward = 40.00, steps = 40\n",
      "11:31:31 [DEBUG] verify episode 59: reward = 71.00, steps = 71\n",
      "11:31:34 [DEBUG] verify episode 60: reward = 70.00, steps = 70\n",
      "11:31:36 [DEBUG] verify episode 61: reward = 39.00, steps = 39\n",
      "11:31:38 [DEBUG] verify episode 62: reward = 51.00, steps = 51\n",
      "11:31:41 [DEBUG] verify episode 63: reward = 56.00, steps = 56\n",
      "11:31:44 [DEBUG] verify episode 64: reward = 54.00, steps = 54\n",
      "11:31:47 [DEBUG] verify episode 65: reward = 70.00, steps = 70\n",
      "11:31:50 [DEBUG] verify episode 66: reward = 68.00, steps = 68\n",
      "11:31:53 [DEBUG] verify episode 67: reward = 61.00, steps = 61\n",
      "11:31:55 [DEBUG] verify episode 68: reward = 43.00, steps = 43\n",
      "11:31:57 [DEBUG] verify episode 69: reward = 51.00, steps = 51\n",
      "11:32:00 [DEBUG] verify episode 70: reward = 58.00, steps = 58\n",
      "11:32:03 [DEBUG] verify episode 71: reward = 64.00, steps = 64\n",
      "11:32:05 [DEBUG] verify episode 72: reward = 38.00, steps = 38\n",
      "11:32:07 [DEBUG] verify episode 73: reward = 34.00, steps = 34\n",
      "11:32:11 [DEBUG] verify episode 74: reward = 88.00, steps = 88\n",
      "11:32:16 [DEBUG] verify episode 75: reward = 125.00, steps = 125\n",
      "11:32:21 [DEBUG] verify episode 76: reward = 92.00, steps = 92\n",
      "11:32:25 [DEBUG] verify episode 77: reward = 103.00, steps = 103\n",
      "11:32:29 [DEBUG] verify episode 78: reward = 82.00, steps = 82\n",
      "11:32:31 [DEBUG] verify episode 79: reward = 37.00, steps = 37\n",
      "11:32:33 [DEBUG] verify episode 80: reward = 38.00, steps = 38\n",
      "11:32:36 [DEBUG] verify episode 81: reward = 61.00, steps = 61\n",
      "11:32:38 [DEBUG] verify episode 82: reward = 60.00, steps = 60\n",
      "11:32:41 [DEBUG] verify episode 83: reward = 63.00, steps = 63\n",
      "11:32:45 [DEBUG] verify episode 84: reward = 81.00, steps = 81\n",
      "11:32:48 [DEBUG] verify episode 85: reward = 54.00, steps = 54\n",
      "11:32:50 [DEBUG] verify episode 86: reward = 50.00, steps = 50\n",
      "11:32:53 [DEBUG] verify episode 87: reward = 72.00, steps = 72\n",
      "11:32:56 [DEBUG] verify episode 88: reward = 59.00, steps = 59\n",
      "11:33:01 [DEBUG] verify episode 89: reward = 96.00, steps = 96\n",
      "11:33:03 [DEBUG] verify episode 90: reward = 48.00, steps = 48\n",
      "11:33:07 [DEBUG] verify episode 91: reward = 83.00, steps = 83\n",
      "11:33:09 [DEBUG] verify episode 92: reward = 50.00, steps = 50\n",
      "11:33:12 [DEBUG] verify episode 93: reward = 54.00, steps = 54\n",
      "11:33:14 [DEBUG] verify episode 94: reward = 47.00, steps = 47\n",
      "11:33:16 [DEBUG] verify episode 95: reward = 45.00, steps = 45\n",
      "11:33:19 [DEBUG] verify episode 96: reward = 68.00, steps = 68\n",
      "11:33:22 [DEBUG] verify episode 97: reward = 66.00, steps = 66\n",
      "11:33:25 [DEBUG] verify episode 98: reward = 53.00, steps = 53\n",
      "11:33:28 [DEBUG] verify episode 99: reward = 58.00, steps = 58\n",
      "11:33:32 [DEBUG] verify episode 100: reward = 90.00, steps = 90\n",
      "11:33:35 [DEBUG] verify episode 101: reward = 74.00, steps = 74\n",
      "11:33:38 [DEBUG] verify episode 102: reward = 61.00, steps = 61\n",
      "11:33:41 [DEBUG] verify episode 103: reward = 50.00, steps = 50\n",
      "11:33:43 [DEBUG] verify episode 104: reward = 51.00, steps = 51\n",
      "11:33:46 [DEBUG] verify episode 105: reward = 58.00, steps = 58\n",
      "11:33:50 [DEBUG] verify episode 106: reward = 100.00, steps = 100\n",
      "11:33:53 [DEBUG] verify episode 107: reward = 49.00, steps = 49\n",
      "11:33:55 [DEBUG] verify episode 108: reward = 50.00, steps = 50\n",
      "11:33:57 [DEBUG] verify episode 109: reward = 32.00, steps = 32\n",
      "11:34:01 [DEBUG] verify episode 110: reward = 81.00, steps = 81\n",
      "11:34:04 [DEBUG] verify episode 111: reward = 75.00, steps = 75\n",
      "11:34:08 [DEBUG] verify episode 112: reward = 76.00, steps = 76\n",
      "11:34:11 [DEBUG] verify episode 113: reward = 71.00, steps = 71\n",
      "11:34:14 [DEBUG] verify episode 114: reward = 72.00, steps = 72\n",
      "11:34:17 [DEBUG] verify episode 115: reward = 63.00, steps = 63\n",
      "11:34:21 [DEBUG] verify episode 116: reward = 87.00, steps = 87\n",
      "11:34:26 [DEBUG] verify episode 117: reward = 104.00, steps = 104\n",
      "11:34:30 [DEBUG] verify episode 118: reward = 91.00, steps = 91\n",
      "11:34:33 [DEBUG] verify episode 119: reward = 53.00, steps = 53\n",
      "11:34:36 [DEBUG] verify episode 120: reward = 61.00, steps = 61\n",
      "11:34:39 [DEBUG] verify episode 121: reward = 61.00, steps = 61\n",
      "11:34:41 [DEBUG] verify episode 122: reward = 57.00, steps = 57\n",
      "11:34:46 [DEBUG] verify episode 123: reward = 97.00, steps = 97\n",
      "11:34:50 [DEBUG] verify episode 124: reward = 70.00, steps = 70\n",
      "11:34:52 [DEBUG] verify episode 125: reward = 41.00, steps = 41\n",
      "11:34:55 [DEBUG] verify episode 126: reward = 66.00, steps = 66\n",
      "11:34:58 [DEBUG] verify episode 127: reward = 74.00, steps = 74\n",
      "11:35:02 [DEBUG] verify episode 128: reward = 84.00, steps = 84\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:35:05 [DEBUG] verify episode 129: reward = 59.00, steps = 59\n",
      "11:35:07 [DEBUG] verify episode 130: reward = 53.00, steps = 53\n",
      "11:35:14 [DEBUG] verify episode 131: reward = 145.00, steps = 145\n",
      "11:35:18 [DEBUG] verify episode 132: reward = 93.00, steps = 93\n",
      "11:35:22 [DEBUG] verify episode 133: reward = 85.00, steps = 85\n",
      "11:35:26 [DEBUG] verify episode 134: reward = 81.00, steps = 81\n",
      "11:35:28 [DEBUG] verify episode 135: reward = 40.00, steps = 40\n",
      "11:35:31 [DEBUG] verify episode 136: reward = 56.00, steps = 56\n",
      "11:35:33 [DEBUG] verify episode 137: reward = 59.00, steps = 59\n",
      "11:35:36 [DEBUG] verify episode 138: reward = 52.00, steps = 52\n",
      "11:35:38 [DEBUG] verify episode 139: reward = 39.00, steps = 39\n",
      "11:35:45 [DEBUG] verify episode 140: reward = 143.00, steps = 143\n",
      "11:35:50 [DEBUG] verify episode 141: reward = 104.00, steps = 104\n",
      "11:35:53 [DEBUG] verify episode 142: reward = 65.00, steps = 65\n",
      "11:35:55 [DEBUG] verify episode 143: reward = 50.00, steps = 50\n",
      "11:35:58 [DEBUG] verify episode 144: reward = 72.00, steps = 72\n",
      "11:36:02 [DEBUG] verify episode 145: reward = 67.00, steps = 67\n",
      "11:36:07 [DEBUG] verify episode 146: reward = 127.00, steps = 127\n",
      "11:36:16 [DEBUG] verify episode 147: reward = 188.00, steps = 188\n",
      "11:36:20 [DEBUG] verify episode 148: reward = 90.00, steps = 90\n",
      "11:36:23 [DEBUG] verify episode 149: reward = 66.00, steps = 66\n",
      "11:36:26 [DEBUG] verify episode 150: reward = 71.00, steps = 71\n",
      "11:36:30 [DEBUG] verify episode 151: reward = 71.00, steps = 71\n",
      "11:36:33 [DEBUG] verify episode 152: reward = 81.00, steps = 81\n",
      "11:36:37 [DEBUG] verify episode 153: reward = 72.00, steps = 72\n",
      "11:36:40 [DEBUG] verify episode 154: reward = 68.00, steps = 68\n",
      "11:36:46 [DEBUG] verify episode 155: reward = 126.00, steps = 126\n",
      "11:36:47 [DEBUG] verify episode 156: reward = 38.00, steps = 38\n",
      "11:36:50 [DEBUG] verify episode 157: reward = 60.00, steps = 60\n",
      "11:36:53 [DEBUG] verify episode 158: reward = 68.00, steps = 68\n",
      "11:36:58 [DEBUG] verify episode 159: reward = 89.00, steps = 89\n",
      "11:37:02 [DEBUG] verify episode 160: reward = 107.00, steps = 107\n",
      "11:37:08 [DEBUG] verify episode 161: reward = 122.00, steps = 122\n",
      "11:37:10 [DEBUG] verify episode 162: reward = 49.00, steps = 49\n",
      "11:37:14 [DEBUG] verify episode 163: reward = 71.00, steps = 71\n",
      "11:37:16 [DEBUG] verify episode 164: reward = 48.00, steps = 48\n",
      "11:37:21 [DEBUG] verify episode 165: reward = 125.00, steps = 125\n",
      "11:37:26 [DEBUG] verify episode 166: reward = 110.00, steps = 110\n",
      "11:37:30 [DEBUG] verify episode 167: reward = 85.00, steps = 85\n",
      "11:37:34 [DEBUG] verify episode 168: reward = 86.00, steps = 86\n",
      "11:37:38 [DEBUG] verify episode 169: reward = 75.00, steps = 75\n",
      "11:37:41 [DEBUG] verify episode 170: reward = 63.00, steps = 63\n",
      "11:37:44 [DEBUG] verify episode 171: reward = 74.00, steps = 74\n",
      "11:37:48 [DEBUG] verify episode 172: reward = 78.00, steps = 78\n",
      "11:37:51 [DEBUG] verify episode 173: reward = 64.00, steps = 64\n",
      "11:37:52 [DEBUG] verify episode 174: reward = 39.00, steps = 39\n",
      "11:37:56 [DEBUG] verify episode 175: reward = 69.00, steps = 69\n",
      "11:37:59 [DEBUG] verify episode 176: reward = 73.00, steps = 73\n",
      "11:38:03 [DEBUG] verify episode 177: reward = 80.00, steps = 80\n",
      "11:38:05 [DEBUG] verify episode 178: reward = 56.00, steps = 56\n",
      "11:38:07 [DEBUG] verify episode 179: reward = 43.00, steps = 43\n",
      "11:38:12 [DEBUG] verify episode 180: reward = 100.00, steps = 100\n",
      "11:38:15 [DEBUG] verify episode 181: reward = 67.00, steps = 67\n",
      "11:38:17 [DEBUG] verify episode 182: reward = 46.00, steps = 46\n",
      "11:38:21 [DEBUG] verify episode 183: reward = 77.00, steps = 77\n",
      "11:38:24 [DEBUG] verify episode 184: reward = 70.00, steps = 70\n",
      "11:38:27 [DEBUG] verify episode 185: reward = 58.00, steps = 58\n",
      "11:38:29 [DEBUG] verify episode 186: reward = 55.00, steps = 55\n",
      "11:38:34 [DEBUG] verify episode 187: reward = 96.00, steps = 96\n",
      "11:38:40 [DEBUG] verify episode 188: reward = 138.00, steps = 138\n",
      "11:38:42 [DEBUG] verify episode 189: reward = 52.00, steps = 52\n",
      "11:38:47 [DEBUG] verify episode 190: reward = 91.00, steps = 91\n",
      "11:38:50 [DEBUG] verify episode 191: reward = 70.00, steps = 70\n",
      "11:38:53 [DEBUG] verify episode 192: reward = 73.00, steps = 73\n",
      "11:38:57 [DEBUG] verify episode 193: reward = 81.00, steps = 81\n",
      "11:39:03 [DEBUG] verify episode 194: reward = 139.00, steps = 139\n",
      "11:39:06 [DEBUG] verify episode 195: reward = 63.00, steps = 63\n",
      "11:39:13 [DEBUG] verify episode 196: reward = 132.00, steps = 132\n",
      "11:39:17 [DEBUG] verify episode 197: reward = 82.00, steps = 82\n",
      "11:39:20 [DEBUG] verify episode 198: reward = 81.00, steps = 81\n",
      "11:39:24 [DEBUG] verify episode 199: reward = 78.00, steps = 78\n",
      "11:39:28 [DEBUG] verify episode 200: reward = 93.00, steps = 93\n",
      "11:39:31 [DEBUG] verify episode 201: reward = 70.00, steps = 70\n",
      "11:39:34 [DEBUG] verify episode 202: reward = 63.00, steps = 63\n",
      "11:39:39 [DEBUG] verify episode 203: reward = 95.00, steps = 95\n",
      "11:39:42 [DEBUG] verify episode 204: reward = 70.00, steps = 70\n",
      "11:39:47 [DEBUG] verify episode 205: reward = 104.00, steps = 104\n",
      "11:39:50 [DEBUG] verify episode 206: reward = 66.00, steps = 66\n",
      "11:39:57 [DEBUG] verify episode 207: reward = 160.00, steps = 160\n",
      "11:40:01 [DEBUG] verify episode 208: reward = 95.00, steps = 95\n",
      "11:40:04 [DEBUG] verify episode 209: reward = 54.00, steps = 54\n",
      "11:40:06 [DEBUG] verify episode 210: reward = 53.00, steps = 53\n",
      "11:40:09 [DEBUG] verify episode 211: reward = 58.00, steps = 58\n",
      "11:40:13 [DEBUG] verify episode 212: reward = 95.00, steps = 95\n",
      "11:40:18 [DEBUG] verify episode 213: reward = 89.00, steps = 89\n",
      "11:40:23 [DEBUG] verify episode 214: reward = 115.00, steps = 115\n",
      "11:40:28 [DEBUG] verify episode 215: reward = 112.00, steps = 112\n",
      "11:40:32 [DEBUG] verify episode 216: reward = 92.00, steps = 92\n",
      "11:40:37 [DEBUG] verify episode 217: reward = 104.00, steps = 104\n",
      "11:40:40 [DEBUG] verify episode 218: reward = 71.00, steps = 71\n",
      "11:40:45 [DEBUG] verify episode 219: reward = 104.00, steps = 104\n",
      "11:40:48 [DEBUG] verify episode 220: reward = 67.00, steps = 67\n",
      "11:40:51 [DEBUG] verify episode 221: reward = 68.00, steps = 68\n",
      "11:40:56 [DEBUG] verify episode 222: reward = 100.00, steps = 100\n",
      "11:40:59 [DEBUG] verify episode 223: reward = 63.00, steps = 63\n",
      "11:41:01 [DEBUG] verify episode 224: reward = 44.00, steps = 44\n",
      "11:41:03 [DEBUG] verify episode 225: reward = 51.00, steps = 51\n",
      "11:41:06 [DEBUG] verify episode 226: reward = 63.00, steps = 63\n",
      "11:41:10 [DEBUG] verify episode 227: reward = 83.00, steps = 83\n",
      "11:41:16 [DEBUG] verify episode 228: reward = 122.00, steps = 122\n",
      "11:41:18 [DEBUG] verify episode 229: reward = 60.00, steps = 60\n",
      "11:41:21 [DEBUG] verify episode 230: reward = 66.00, steps = 66\n",
      "11:41:25 [DEBUG] verify episode 231: reward = 82.00, steps = 82\n",
      "11:41:29 [DEBUG] verify episode 232: reward = 75.00, steps = 75\n",
      "11:41:32 [DEBUG] verify episode 233: reward = 67.00, steps = 67\n",
      "11:41:35 [DEBUG] verify episode 234: reward = 77.00, steps = 77\n",
      "11:41:39 [DEBUG] verify episode 235: reward = 74.00, steps = 74\n",
      "11:41:44 [DEBUG] verify episode 236: reward = 113.00, steps = 113\n",
      "11:41:47 [DEBUG] verify episode 237: reward = 66.00, steps = 66\n",
      "11:41:51 [DEBUG] verify episode 238: reward = 83.00, steps = 83\n",
      "11:41:54 [DEBUG] verify episode 239: reward = 66.00, steps = 66\n",
      "11:41:57 [DEBUG] verify episode 240: reward = 69.00, steps = 69\n",
      "11:42:02 [DEBUG] verify episode 241: reward = 101.00, steps = 101\n",
      "11:42:08 [DEBUG] verify episode 242: reward = 133.00, steps = 133\n",
      "11:42:11 [DEBUG] verify episode 243: reward = 72.00, steps = 72\n",
      "11:42:17 [DEBUG] verify episode 244: reward = 120.00, steps = 120\n",
      "11:42:21 [DEBUG] verify episode 245: reward = 104.00, steps = 104\n",
      "11:42:26 [DEBUG] verify episode 246: reward = 97.00, steps = 97\n",
      "11:42:29 [DEBUG] verify episode 247: reward = 73.00, steps = 73\n",
      "11:42:32 [DEBUG] verify episode 248: reward = 55.00, steps = 55\n",
      "11:42:34 [DEBUG] verify episode 249: reward = 62.00, steps = 62\n",
      "11:42:39 [DEBUG] verify episode 250: reward = 88.00, steps = 88\n",
      "11:42:42 [DEBUG] verify episode 251: reward = 73.00, steps = 73\n",
      "11:42:44 [DEBUG] verify episode 252: reward = 50.00, steps = 50\n",
      "11:42:48 [DEBUG] verify episode 253: reward = 79.00, steps = 79\n",
      "11:42:52 [DEBUG] verify episode 254: reward = 88.00, steps = 88\n",
      "11:42:57 [DEBUG] verify episode 255: reward = 100.00, steps = 100\n",
      "11:43:04 [DEBUG] verify episode 256: reward = 174.00, steps = 174\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:43:10 [DEBUG] verify episode 257: reward = 121.00, steps = 121\n",
      "11:43:14 [DEBUG] verify episode 258: reward = 75.00, steps = 75\n",
      "11:43:16 [DEBUG] verify episode 259: reward = 42.00, steps = 42\n",
      "11:43:17 [DEBUG] verify episode 260: reward = 41.00, steps = 41\n",
      "11:43:23 [DEBUG] verify episode 261: reward = 123.00, steps = 123\n",
      "11:43:26 [DEBUG] verify episode 262: reward = 58.00, steps = 58\n",
      "11:43:28 [DEBUG] verify episode 263: reward = 58.00, steps = 58\n",
      "11:43:33 [DEBUG] verify episode 264: reward = 105.00, steps = 105\n",
      "11:43:36 [DEBUG] verify episode 265: reward = 61.00, steps = 61\n",
      "11:43:39 [DEBUG] verify episode 266: reward = 63.00, steps = 63\n",
      "11:43:42 [DEBUG] verify episode 267: reward = 74.00, steps = 74\n",
      "11:43:46 [DEBUG] verify episode 268: reward = 71.00, steps = 71\n",
      "11:43:48 [DEBUG] verify episode 269: reward = 56.00, steps = 56\n",
      "11:43:53 [DEBUG] verify episode 270: reward = 113.00, steps = 113\n",
      "11:43:56 [DEBUG] verify episode 271: reward = 60.00, steps = 60\n",
      "11:44:00 [DEBUG] verify episode 272: reward = 77.00, steps = 77\n",
      "11:44:03 [DEBUG] verify episode 273: reward = 67.00, steps = 67\n",
      "11:44:07 [DEBUG] verify episode 274: reward = 78.00, steps = 78\n",
      "11:44:10 [DEBUG] verify episode 275: reward = 68.00, steps = 68\n",
      "11:44:14 [DEBUG] verify episode 276: reward = 91.00, steps = 91\n",
      "11:44:18 [DEBUG] verify episode 277: reward = 84.00, steps = 84\n",
      "11:44:23 [DEBUG] verify episode 278: reward = 112.00, steps = 112\n",
      "11:44:26 [DEBUG] verify episode 279: reward = 74.00, steps = 74\n",
      "11:44:31 [DEBUG] verify episode 280: reward = 94.00, steps = 94\n",
      "11:44:34 [DEBUG] verify episode 281: reward = 75.00, steps = 75\n",
      "11:44:37 [DEBUG] verify episode 282: reward = 75.00, steps = 75\n",
      "11:44:40 [DEBUG] verify episode 283: reward = 56.00, steps = 56\n",
      "11:44:44 [DEBUG] verify episode 284: reward = 81.00, steps = 81\n",
      "11:44:48 [DEBUG] verify episode 285: reward = 79.00, steps = 79\n",
      "11:44:51 [DEBUG] verify episode 286: reward = 64.00, steps = 64\n",
      "11:44:53 [DEBUG] verify episode 287: reward = 46.00, steps = 46\n",
      "11:44:56 [DEBUG] verify episode 288: reward = 61.00, steps = 61\n",
      "11:44:58 [DEBUG] verify episode 289: reward = 52.00, steps = 52\n",
      "11:45:02 [DEBUG] verify episode 290: reward = 79.00, steps = 79\n",
      "11:45:06 [DEBUG] verify episode 291: reward = 97.00, steps = 97\n",
      "11:45:09 [DEBUG] verify episode 292: reward = 71.00, steps = 71\n",
      "11:45:13 [DEBUG] verify episode 293: reward = 81.00, steps = 81\n",
      "11:45:16 [DEBUG] verify episode 294: reward = 63.00, steps = 63\n",
      "11:45:20 [DEBUG] verify episode 295: reward = 80.00, steps = 80\n",
      "11:45:24 [DEBUG] verify episode 296: reward = 100.00, steps = 100\n",
      "11:45:28 [DEBUG] verify episode 297: reward = 80.00, steps = 80\n",
      "11:45:35 [DEBUG] verify episode 298: reward = 150.00, steps = 150\n",
      "11:45:42 [DEBUG] verify episode 299: reward = 163.00, steps = 163\n",
      "11:45:44 [DEBUG] verify episode 300: reward = 47.00, steps = 47\n",
      "11:45:48 [DEBUG] verify episode 301: reward = 86.00, steps = 86\n",
      "11:45:53 [DEBUG] verify episode 302: reward = 106.00, steps = 106\n",
      "11:45:56 [DEBUG] verify episode 303: reward = 68.00, steps = 68\n",
      "11:46:00 [DEBUG] verify episode 304: reward = 82.00, steps = 82\n",
      "11:46:03 [DEBUG] verify episode 305: reward = 66.00, steps = 66\n",
      "11:46:05 [DEBUG] verify episode 306: reward = 46.00, steps = 46\n",
      "11:46:08 [DEBUG] verify episode 307: reward = 69.00, steps = 69\n",
      "11:46:11 [DEBUG] verify episode 308: reward = 58.00, steps = 58\n",
      "11:46:14 [DEBUG] verify episode 309: reward = 60.00, steps = 60\n",
      "11:46:18 [DEBUG] verify episode 310: reward = 87.00, steps = 87\n",
      "11:46:22 [DEBUG] verify episode 311: reward = 88.00, steps = 88\n",
      "11:46:27 [DEBUG] verify episode 312: reward = 117.00, steps = 117\n",
      "11:46:30 [DEBUG] verify episode 313: reward = 52.00, steps = 52\n",
      "11:46:33 [DEBUG] verify episode 314: reward = 82.00, steps = 82\n",
      "11:46:38 [DEBUG] verify episode 315: reward = 95.00, steps = 95\n",
      "11:46:42 [DEBUG] verify episode 316: reward = 85.00, steps = 85\n",
      "11:46:46 [DEBUG] verify episode 317: reward = 86.00, steps = 86\n",
      "11:46:52 [DEBUG] verify episode 318: reward = 145.00, steps = 145\n",
      "11:46:55 [DEBUG] verify episode 319: reward = 62.00, steps = 62\n",
      "11:46:59 [DEBUG] verify episode 320: reward = 88.00, steps = 88\n",
      "11:47:02 [DEBUG] verify episode 321: reward = 53.00, steps = 53\n",
      "11:47:05 [DEBUG] verify episode 322: reward = 64.00, steps = 64\n",
      "11:47:10 [DEBUG] verify episode 323: reward = 112.00, steps = 112\n",
      "11:47:13 [DEBUG] verify episode 324: reward = 74.00, steps = 74\n",
      "11:47:16 [DEBUG] verify episode 325: reward = 58.00, steps = 58\n",
      "11:47:19 [DEBUG] verify episode 326: reward = 65.00, steps = 65\n",
      "11:47:22 [DEBUG] verify episode 327: reward = 71.00, steps = 71\n",
      "11:47:26 [DEBUG] verify episode 328: reward = 87.00, steps = 87\n",
      "11:47:29 [DEBUG] verify episode 329: reward = 61.00, steps = 61\n",
      "11:47:33 [DEBUG] verify episode 330: reward = 86.00, steps = 86\n",
      "11:47:36 [DEBUG] verify episode 331: reward = 73.00, steps = 73\n",
      "11:47:40 [DEBUG] verify episode 332: reward = 86.00, steps = 86\n",
      "11:47:43 [DEBUG] verify episode 333: reward = 54.00, steps = 54\n",
      "11:47:46 [DEBUG] verify episode 334: reward = 62.00, steps = 62\n",
      "11:47:49 [DEBUG] verify episode 335: reward = 70.00, steps = 70\n",
      "11:47:52 [DEBUG] verify episode 336: reward = 60.00, steps = 60\n",
      "11:47:56 [DEBUG] verify episode 337: reward = 82.00, steps = 82\n",
      "11:48:00 [DEBUG] verify episode 338: reward = 97.00, steps = 97\n",
      "11:48:03 [DEBUG] verify episode 339: reward = 70.00, steps = 70\n",
      "11:48:07 [DEBUG] verify episode 340: reward = 83.00, steps = 83\n",
      "11:48:11 [DEBUG] verify episode 341: reward = 91.00, steps = 91\n",
      "11:48:15 [DEBUG] verify episode 342: reward = 84.00, steps = 84\n",
      "11:48:18 [DEBUG] verify episode 343: reward = 72.00, steps = 72\n",
      "11:48:24 [DEBUG] verify episode 344: reward = 119.00, steps = 119\n",
      "11:48:27 [DEBUG] verify episode 345: reward = 59.00, steps = 59\n",
      "11:48:30 [DEBUG] verify episode 346: reward = 82.00, steps = 82\n",
      "11:48:35 [DEBUG] verify episode 347: reward = 94.00, steps = 94\n",
      "11:48:39 [DEBUG] verify episode 348: reward = 103.00, steps = 103\n",
      "11:48:46 [DEBUG] verify episode 349: reward = 149.00, steps = 149\n",
      "11:48:50 [DEBUG] verify episode 350: reward = 82.00, steps = 82\n",
      "11:48:54 [DEBUG] verify episode 351: reward = 77.00, steps = 77\n",
      "11:48:57 [DEBUG] verify episode 352: reward = 78.00, steps = 78\n",
      "11:49:01 [DEBUG] verify episode 353: reward = 75.00, steps = 75\n",
      "11:49:05 [DEBUG] verify episode 354: reward = 89.00, steps = 89\n",
      "11:49:09 [DEBUG] verify episode 355: reward = 81.00, steps = 81\n",
      "11:49:15 [DEBUG] verify episode 356: reward = 137.00, steps = 137\n",
      "11:49:19 [DEBUG] verify episode 357: reward = 105.00, steps = 105\n",
      "11:49:23 [DEBUG] verify episode 358: reward = 88.00, steps = 88\n",
      "11:49:28 [DEBUG] verify episode 359: reward = 102.00, steps = 102\n",
      "11:49:32 [DEBUG] verify episode 360: reward = 85.00, steps = 85\n",
      "11:49:37 [DEBUG] verify episode 361: reward = 106.00, steps = 106\n",
      "11:49:40 [DEBUG] verify episode 362: reward = 74.00, steps = 74\n",
      "11:49:44 [DEBUG] verify episode 363: reward = 79.00, steps = 79\n",
      "11:49:48 [DEBUG] verify episode 364: reward = 78.00, steps = 78\n",
      "11:49:52 [DEBUG] verify episode 365: reward = 93.00, steps = 93\n",
      "11:49:59 [DEBUG] verify episode 366: reward = 147.00, steps = 147\n",
      "11:50:05 [DEBUG] verify episode 367: reward = 149.00, steps = 149\n",
      "11:50:12 [DEBUG] verify episode 368: reward = 140.00, steps = 140\n",
      "11:50:15 [DEBUG] verify episode 369: reward = 71.00, steps = 71\n",
      "11:50:21 [DEBUG] verify episode 370: reward = 123.00, steps = 123\n",
      "11:50:25 [DEBUG] verify episode 371: reward = 99.00, steps = 99\n",
      "11:50:28 [DEBUG] verify episode 372: reward = 71.00, steps = 71\n",
      "11:50:33 [DEBUG] verify episode 373: reward = 110.00, steps = 110\n",
      "11:50:36 [DEBUG] verify episode 374: reward = 65.00, steps = 65\n",
      "11:50:39 [DEBUG] verify episode 375: reward = 61.00, steps = 61\n",
      "11:50:44 [DEBUG] verify episode 376: reward = 96.00, steps = 96\n",
      "11:50:48 [DEBUG] verify episode 377: reward = 100.00, steps = 100\n",
      "11:50:55 [DEBUG] verify episode 378: reward = 148.00, steps = 148\n",
      "11:51:03 [DEBUG] verify episode 379: reward = 166.00, steps = 166\n",
      "11:51:05 [DEBUG] verify episode 380: reward = 59.00, steps = 59\n",
      "11:51:09 [DEBUG] verify episode 381: reward = 84.00, steps = 84\n",
      "11:51:16 [DEBUG] verify episode 382: reward = 150.00, steps = 150\n",
      "11:51:21 [DEBUG] verify episode 383: reward = 100.00, steps = 100\n",
      "11:51:23 [DEBUG] verify episode 384: reward = 65.00, steps = 65\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:51:28 [DEBUG] verify episode 385: reward = 92.00, steps = 92\n",
      "11:51:31 [DEBUG] verify episode 386: reward = 66.00, steps = 66\n",
      "11:51:35 [DEBUG] verify episode 387: reward = 101.00, steps = 101\n",
      "11:51:39 [DEBUG] verify episode 388: reward = 86.00, steps = 86\n",
      "11:51:44 [DEBUG] verify episode 389: reward = 92.00, steps = 92\n",
      "11:51:47 [DEBUG] verify episode 390: reward = 73.00, steps = 73\n",
      "11:51:50 [DEBUG] verify episode 391: reward = 60.00, steps = 60\n",
      "11:51:53 [DEBUG] verify episode 392: reward = 71.00, steps = 71\n",
      "11:51:58 [DEBUG] verify episode 393: reward = 109.00, steps = 109\n",
      "11:52:02 [DEBUG] verify episode 394: reward = 95.00, steps = 95\n",
      "11:52:06 [DEBUG] verify episode 395: reward = 74.00, steps = 74\n",
      "11:52:11 [DEBUG] verify episode 396: reward = 117.00, steps = 117\n",
      "11:52:16 [DEBUG] verify episode 397: reward = 110.00, steps = 110\n",
      "11:52:20 [DEBUG] verify episode 398: reward = 89.00, steps = 89\n",
      "11:52:25 [DEBUG] verify episode 399: reward = 103.00, steps = 103\n",
      "11:52:30 [DEBUG] verify episode 400: reward = 107.00, steps = 107\n",
      "11:52:35 [DEBUG] verify episode 401: reward = 111.00, steps = 111\n",
      "11:52:38 [DEBUG] verify episode 402: reward = 79.00, steps = 79\n",
      "11:52:42 [DEBUG] verify episode 403: reward = 75.00, steps = 75\n",
      "11:52:46 [DEBUG] verify episode 404: reward = 95.00, steps = 95\n",
      "11:52:52 [DEBUG] verify episode 405: reward = 133.00, steps = 133\n",
      "11:52:57 [DEBUG] verify episode 406: reward = 100.00, steps = 100\n",
      "11:53:02 [DEBUG] verify episode 407: reward = 101.00, steps = 101\n",
      "11:53:06 [DEBUG] verify episode 408: reward = 105.00, steps = 105\n",
      "11:53:10 [DEBUG] verify episode 409: reward = 88.00, steps = 88\n",
      "11:53:15 [DEBUG] verify episode 410: reward = 110.00, steps = 110\n",
      "11:53:19 [DEBUG] verify episode 411: reward = 81.00, steps = 81\n",
      "11:53:25 [DEBUG] verify episode 412: reward = 121.00, steps = 121\n",
      "11:53:28 [DEBUG] verify episode 413: reward = 76.00, steps = 76\n",
      "11:53:33 [DEBUG] verify episode 414: reward = 103.00, steps = 103\n",
      "11:53:36 [DEBUG] verify episode 415: reward = 66.00, steps = 66\n",
      "11:53:40 [DEBUG] verify episode 416: reward = 91.00, steps = 91\n",
      "11:53:44 [DEBUG] verify episode 417: reward = 83.00, steps = 83\n",
      "11:53:49 [DEBUG] verify episode 418: reward = 110.00, steps = 110\n",
      "11:53:58 [DEBUG] verify episode 419: reward = 200.00, steps = 200\n",
      "11:54:02 [DEBUG] verify episode 420: reward = 94.00, steps = 94\n",
      "11:54:06 [DEBUG] verify episode 421: reward = 76.00, steps = 76\n",
      "11:54:10 [DEBUG] verify episode 422: reward = 92.00, steps = 92\n",
      "11:54:14 [DEBUG] verify episode 423: reward = 82.00, steps = 82\n",
      "11:54:16 [DEBUG] verify episode 424: reward = 58.00, steps = 58\n",
      "11:54:21 [DEBUG] verify episode 425: reward = 99.00, steps = 99\n",
      "11:54:26 [DEBUG] verify episode 426: reward = 104.00, steps = 104\n",
      "11:54:30 [DEBUG] verify episode 427: reward = 74.00, steps = 74\n",
      "11:54:32 [DEBUG] verify episode 428: reward = 59.00, steps = 59\n",
      "11:54:35 [DEBUG] verify episode 429: reward = 58.00, steps = 58\n",
      "11:54:40 [DEBUG] verify episode 430: reward = 97.00, steps = 97\n",
      "11:54:45 [DEBUG] verify episode 431: reward = 109.00, steps = 109\n",
      "11:54:51 [DEBUG] verify episode 432: reward = 126.00, steps = 126\n",
      "11:54:55 [DEBUG] verify episode 433: reward = 88.00, steps = 88\n",
      "11:54:59 [DEBUG] verify episode 434: reward = 95.00, steps = 95\n",
      "11:55:04 [DEBUG] verify episode 435: reward = 116.00, steps = 116\n",
      "11:55:08 [DEBUG] verify episode 436: reward = 77.00, steps = 77\n",
      "11:55:16 [DEBUG] verify episode 437: reward = 172.00, steps = 172\n",
      "11:55:19 [DEBUG] verify episode 438: reward = 82.00, steps = 82\n",
      "11:55:23 [DEBUG] verify episode 439: reward = 86.00, steps = 86\n",
      "11:55:29 [DEBUG] verify episode 440: reward = 128.00, steps = 128\n",
      "11:55:36 [DEBUG] verify episode 441: reward = 142.00, steps = 142\n",
      "11:55:40 [DEBUG] verify episode 442: reward = 99.00, steps = 99\n",
      "11:55:44 [DEBUG] verify episode 443: reward = 84.00, steps = 84\n",
      "11:55:50 [DEBUG] verify episode 444: reward = 121.00, steps = 121\n",
      "11:55:54 [DEBUG] verify episode 445: reward = 95.00, steps = 95\n",
      "11:55:57 [DEBUG] verify episode 446: reward = 65.00, steps = 65\n",
      "11:56:01 [DEBUG] verify episode 447: reward = 75.00, steps = 75\n",
      "11:56:05 [DEBUG] verify episode 448: reward = 104.00, steps = 104\n",
      "11:56:10 [DEBUG] verify episode 449: reward = 100.00, steps = 100\n",
      "11:56:15 [DEBUG] verify episode 450: reward = 106.00, steps = 106\n",
      "11:56:18 [DEBUG] verify episode 451: reward = 67.00, steps = 67\n",
      "11:56:23 [DEBUG] verify episode 452: reward = 105.00, steps = 105\n",
      "11:56:26 [DEBUG] verify episode 453: reward = 63.00, steps = 63\n",
      "11:56:30 [DEBUG] verify episode 454: reward = 93.00, steps = 93\n",
      "11:56:34 [DEBUG] verify episode 455: reward = 91.00, steps = 91\n",
      "11:56:39 [DEBUG] verify episode 456: reward = 106.00, steps = 106\n",
      "11:56:44 [DEBUG] verify episode 457: reward = 114.00, steps = 114\n",
      "11:56:48 [DEBUG] verify episode 458: reward = 80.00, steps = 80\n",
      "11:56:53 [DEBUG] verify episode 459: reward = 116.00, steps = 116\n",
      "11:56:56 [DEBUG] verify episode 460: reward = 76.00, steps = 76\n",
      "11:57:01 [DEBUG] verify episode 461: reward = 97.00, steps = 97\n",
      "11:57:05 [DEBUG] verify episode 462: reward = 87.00, steps = 87\n",
      "11:57:12 [DEBUG] verify episode 463: reward = 144.00, steps = 144\n",
      "11:57:15 [DEBUG] verify episode 464: reward = 73.00, steps = 73\n",
      "11:57:20 [DEBUG] verify episode 465: reward = 118.00, steps = 118\n",
      "11:57:24 [DEBUG] verify episode 466: reward = 82.00, steps = 82\n",
      "11:57:28 [DEBUG] verify episode 467: reward = 88.00, steps = 88\n",
      "11:57:34 [DEBUG] verify episode 468: reward = 119.00, steps = 119\n",
      "11:57:39 [DEBUG] verify episode 469: reward = 116.00, steps = 116\n",
      "11:57:44 [DEBUG] verify episode 470: reward = 109.00, steps = 109\n",
      "11:57:48 [DEBUG] verify episode 471: reward = 84.00, steps = 84\n",
      "11:57:57 [DEBUG] verify episode 472: reward = 200.00, steps = 200\n",
      "11:58:02 [DEBUG] verify episode 473: reward = 107.00, steps = 107\n",
      "11:58:08 [DEBUG] verify episode 474: reward = 130.00, steps = 130\n",
      "11:58:11 [DEBUG] verify episode 475: reward = 57.00, steps = 57\n",
      "11:58:13 [DEBUG] verify episode 476: reward = 56.00, steps = 56\n",
      "11:58:18 [DEBUG] verify episode 477: reward = 100.00, steps = 100\n",
      "11:58:23 [DEBUG] verify episode 478: reward = 110.00, steps = 110\n",
      "11:58:27 [DEBUG] verify episode 479: reward = 92.00, steps = 92\n",
      "11:58:31 [DEBUG] verify episode 480: reward = 86.00, steps = 86\n",
      "11:58:36 [DEBUG] verify episode 481: reward = 121.00, steps = 121\n",
      "11:58:41 [DEBUG] verify episode 482: reward = 97.00, steps = 97\n",
      "11:58:45 [DEBUG] verify episode 483: reward = 86.00, steps = 86\n",
      "11:58:51 [DEBUG] verify episode 484: reward = 131.00, steps = 131\n",
      "11:58:54 [DEBUG] verify episode 485: reward = 65.00, steps = 65\n",
      "11:58:58 [DEBUG] verify episode 486: reward = 95.00, steps = 95\n",
      "11:59:03 [DEBUG] verify episode 487: reward = 96.00, steps = 96\n",
      "11:59:06 [DEBUG] verify episode 488: reward = 73.00, steps = 73\n",
      "11:59:09 [DEBUG] verify episode 489: reward = 66.00, steps = 66\n",
      "11:59:16 [DEBUG] verify episode 490: reward = 137.00, steps = 137\n",
      "11:59:20 [DEBUG] verify episode 491: reward = 86.00, steps = 86\n",
      "11:59:23 [DEBUG] verify episode 492: reward = 69.00, steps = 69\n",
      "11:59:28 [DEBUG] verify episode 493: reward = 119.00, steps = 119\n",
      "11:59:33 [DEBUG] verify episode 494: reward = 112.00, steps = 112\n",
      "11:59:37 [DEBUG] verify episode 495: reward = 89.00, steps = 89\n",
      "11:59:42 [DEBUG] verify episode 496: reward = 92.00, steps = 92\n",
      "11:59:45 [DEBUG] verify episode 497: reward = 80.00, steps = 80\n",
      "11:59:49 [DEBUG] verify episode 498: reward = 80.00, steps = 80\n",
      "11:59:52 [DEBUG] verify episode 499: reward = 74.00, steps = 74\n",
      "11:59:56 [DEBUG] verify episode 500: reward = 72.00, steps = 72\n",
      "12:00:00 [DEBUG] verify episode 501: reward = 99.00, steps = 99\n",
      "12:00:04 [DEBUG] verify episode 502: reward = 83.00, steps = 83\n",
      "12:00:11 [DEBUG] verify episode 503: reward = 155.00, steps = 155\n",
      "12:00:16 [DEBUG] verify episode 504: reward = 100.00, steps = 100\n",
      "12:00:21 [DEBUG] verify episode 505: reward = 107.00, steps = 107\n",
      "12:00:26 [DEBUG] verify episode 506: reward = 99.00, steps = 99\n",
      "12:00:29 [DEBUG] verify episode 507: reward = 66.00, steps = 66\n",
      "12:00:32 [DEBUG] verify episode 508: reward = 81.00, steps = 81\n",
      "12:00:39 [DEBUG] verify episode 509: reward = 131.00, steps = 131\n",
      "12:00:42 [DEBUG] verify episode 510: reward = 74.00, steps = 74\n",
      "12:00:47 [DEBUG] verify episode 511: reward = 111.00, steps = 111\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12:00:52 [DEBUG] verify episode 512: reward = 90.00, steps = 90\n",
      "12:00:56 [DEBUG] verify episode 513: reward = 88.00, steps = 88\n",
      "12:01:00 [DEBUG] verify episode 514: reward = 92.00, steps = 92\n",
      "12:01:04 [DEBUG] verify episode 515: reward = 89.00, steps = 89\n",
      "12:01:07 [DEBUG] verify episode 516: reward = 59.00, steps = 59\n",
      "12:01:11 [DEBUG] verify episode 517: reward = 82.00, steps = 82\n",
      "12:01:15 [DEBUG] verify episode 518: reward = 77.00, steps = 77\n",
      "12:01:19 [DEBUG] verify episode 519: reward = 90.00, steps = 90\n",
      "12:01:24 [DEBUG] verify episode 520: reward = 116.00, steps = 116\n",
      "12:01:27 [DEBUG] verify episode 521: reward = 69.00, steps = 69\n",
      "12:01:33 [DEBUG] verify episode 522: reward = 124.00, steps = 124\n",
      "12:01:41 [DEBUG] verify episode 523: reward = 169.00, steps = 169\n",
      "12:01:46 [DEBUG] verify episode 524: reward = 104.00, steps = 104\n",
      "12:01:49 [DEBUG] verify episode 525: reward = 76.00, steps = 76\n",
      "12:01:53 [DEBUG] verify episode 526: reward = 75.00, steps = 75\n",
      "12:01:58 [DEBUG] verify episode 527: reward = 119.00, steps = 119\n",
      "12:02:03 [DEBUG] verify episode 528: reward = 96.00, steps = 96\n",
      "12:02:09 [DEBUG] verify episode 529: reward = 135.00, steps = 135\n",
      "12:02:13 [DEBUG] verify episode 530: reward = 86.00, steps = 86\n",
      "12:02:19 [DEBUG] verify episode 531: reward = 149.00, steps = 149\n",
      "12:02:25 [DEBUG] verify episode 532: reward = 131.00, steps = 131\n",
      "12:02:29 [DEBUG] verify episode 533: reward = 76.00, steps = 76\n",
      "12:02:38 [DEBUG] verify episode 534: reward = 200.00, steps = 200\n",
      "12:02:42 [DEBUG] verify episode 535: reward = 89.00, steps = 89\n",
      "12:02:47 [DEBUG] verify episode 536: reward = 102.00, steps = 102\n",
      "12:02:51 [DEBUG] verify episode 537: reward = 101.00, steps = 101\n",
      "12:02:55 [DEBUG] verify episode 538: reward = 79.00, steps = 79\n",
      "12:02:59 [DEBUG] verify episode 539: reward = 91.00, steps = 91\n",
      "12:03:03 [DEBUG] verify episode 540: reward = 79.00, steps = 79\n",
      "12:03:09 [DEBUG] verify episode 541: reward = 129.00, steps = 129\n",
      "12:03:13 [DEBUG] verify episode 542: reward = 101.00, steps = 101\n",
      "12:03:19 [DEBUG] verify episode 543: reward = 136.00, steps = 136\n",
      "12:03:24 [DEBUG] verify episode 544: reward = 97.00, steps = 97\n",
      "12:03:30 [DEBUG] verify episode 545: reward = 132.00, steps = 132\n",
      "12:03:35 [DEBUG] verify episode 546: reward = 118.00, steps = 118\n",
      "12:03:41 [DEBUG] verify episode 547: reward = 136.00, steps = 136\n",
      "12:03:47 [DEBUG] verify episode 548: reward = 121.00, steps = 121\n",
      "12:03:50 [DEBUG] verify episode 549: reward = 75.00, steps = 75\n",
      "12:03:56 [DEBUG] verify episode 550: reward = 130.00, steps = 130\n",
      "12:04:00 [DEBUG] verify episode 551: reward = 84.00, steps = 84\n",
      "12:04:05 [DEBUG] verify episode 552: reward = 105.00, steps = 105\n",
      "12:04:11 [DEBUG] verify episode 553: reward = 147.00, steps = 147\n",
      "12:04:18 [DEBUG] verify episode 554: reward = 141.00, steps = 141\n",
      "12:04:22 [DEBUG] verify episode 555: reward = 81.00, steps = 81\n",
      "12:04:26 [DEBUG] verify episode 556: reward = 95.00, steps = 95\n",
      "12:04:31 [DEBUG] verify episode 557: reward = 106.00, steps = 106\n",
      "12:04:38 [DEBUG] verify episode 558: reward = 150.00, steps = 150\n",
      "12:04:41 [DEBUG] verify episode 559: reward = 81.00, steps = 81\n",
      "12:04:46 [DEBUG] verify episode 560: reward = 100.00, steps = 100\n",
      "12:04:52 [DEBUG] verify episode 561: reward = 138.00, steps = 138\n",
      "12:04:59 [DEBUG] verify episode 562: reward = 136.00, steps = 136\n",
      "12:05:03 [DEBUG] verify episode 563: reward = 96.00, steps = 96\n",
      "12:05:06 [DEBUG] verify episode 564: reward = 73.00, steps = 73\n",
      "12:05:15 [DEBUG] verify episode 565: reward = 200.00, steps = 200\n",
      "12:05:19 [DEBUG] verify episode 566: reward = 78.00, steps = 78\n",
      "12:05:27 [DEBUG] verify episode 567: reward = 183.00, steps = 183\n",
      "12:05:34 [DEBUG] verify episode 568: reward = 158.00, steps = 158\n",
      "12:05:38 [DEBUG] verify episode 569: reward = 82.00, steps = 82\n",
      "12:05:45 [DEBUG] verify episode 570: reward = 136.00, steps = 136\n",
      "12:05:48 [DEBUG] verify episode 571: reward = 69.00, steps = 69\n",
      "12:05:53 [DEBUG] verify episode 572: reward = 101.00, steps = 101\n",
      "12:05:59 [DEBUG] verify episode 573: reward = 128.00, steps = 128\n",
      "12:06:04 [DEBUG] verify episode 574: reward = 114.00, steps = 114\n",
      "12:06:08 [DEBUG] verify episode 575: reward = 84.00, steps = 84\n",
      "12:06:10 [DEBUG] verify episode 576: reward = 60.00, steps = 60\n",
      "12:06:15 [DEBUG] verify episode 577: reward = 99.00, steps = 99\n",
      "12:06:20 [DEBUG] verify episode 578: reward = 117.00, steps = 117\n",
      "12:06:25 [DEBUG] verify episode 579: reward = 95.00, steps = 95\n",
      "12:06:31 [DEBUG] verify episode 580: reward = 143.00, steps = 143\n",
      "12:06:35 [DEBUG] verify episode 581: reward = 82.00, steps = 82\n",
      "12:06:42 [DEBUG] verify episode 582: reward = 163.00, steps = 163\n",
      "12:06:50 [DEBUG] verify episode 583: reward = 176.00, steps = 176\n",
      "12:06:55 [DEBUG] verify episode 584: reward = 98.00, steps = 98\n",
      "12:06:58 [DEBUG] verify episode 585: reward = 79.00, steps = 79\n",
      "12:07:06 [DEBUG] verify episode 586: reward = 166.00, steps = 166\n",
      "12:07:12 [DEBUG] verify episode 587: reward = 136.00, steps = 136\n",
      "12:07:16 [DEBUG] verify episode 588: reward = 83.00, steps = 83\n",
      "12:07:25 [DEBUG] verify episode 589: reward = 200.00, steps = 200\n",
      "12:07:30 [DEBUG] verify episode 590: reward = 101.00, steps = 101\n",
      "12:07:36 [DEBUG] verify episode 591: reward = 129.00, steps = 129\n",
      "12:07:42 [DEBUG] verify episode 592: reward = 134.00, steps = 134\n",
      "12:07:47 [DEBUG] verify episode 593: reward = 124.00, steps = 124\n",
      "12:07:53 [DEBUG] verify episode 594: reward = 122.00, steps = 122\n",
      "12:07:58 [DEBUG] verify episode 595: reward = 106.00, steps = 106\n",
      "12:08:02 [DEBUG] verify episode 596: reward = 103.00, steps = 103\n",
      "12:08:07 [DEBUG] verify episode 597: reward = 100.00, steps = 100\n",
      "12:08:12 [DEBUG] verify episode 598: reward = 104.00, steps = 104\n",
      "12:08:16 [DEBUG] verify episode 599: reward = 89.00, steps = 89\n",
      "12:08:21 [DEBUG] verify episode 600: reward = 105.00, steps = 105\n",
      "12:08:25 [DEBUG] verify episode 601: reward = 96.00, steps = 96\n",
      "12:08:31 [DEBUG] verify episode 602: reward = 123.00, steps = 123\n",
      "12:08:35 [DEBUG] verify episode 603: reward = 96.00, steps = 96\n",
      "12:08:39 [DEBUG] verify episode 604: reward = 96.00, steps = 96\n",
      "12:08:46 [DEBUG] verify episode 605: reward = 142.00, steps = 142\n",
      "12:08:52 [DEBUG] verify episode 606: reward = 138.00, steps = 138\n",
      "12:08:57 [DEBUG] verify episode 607: reward = 101.00, steps = 101\n",
      "12:09:05 [DEBUG] verify episode 608: reward = 184.00, steps = 184\n",
      "12:09:09 [DEBUG] verify episode 609: reward = 76.00, steps = 76\n",
      "12:09:13 [DEBUG] verify episode 610: reward = 93.00, steps = 93\n",
      "12:09:19 [DEBUG] verify episode 611: reward = 130.00, steps = 130\n",
      "12:09:25 [DEBUG] verify episode 612: reward = 129.00, steps = 129\n",
      "12:09:31 [DEBUG] verify episode 613: reward = 133.00, steps = 133\n",
      "12:09:35 [DEBUG] verify episode 614: reward = 98.00, steps = 98\n",
      "12:09:39 [DEBUG] verify episode 615: reward = 86.00, steps = 86\n",
      "12:09:46 [DEBUG] verify episode 616: reward = 146.00, steps = 146\n",
      "12:09:50 [DEBUG] verify episode 617: reward = 71.00, steps = 71\n",
      "12:09:54 [DEBUG] verify episode 618: reward = 82.00, steps = 82\n",
      "12:10:00 [DEBUG] verify episode 619: reward = 142.00, steps = 142\n",
      "12:10:04 [DEBUG] verify episode 620: reward = 87.00, steps = 87\n",
      "12:10:12 [DEBUG] verify episode 621: reward = 165.00, steps = 165\n",
      "12:10:15 [DEBUG] verify episode 622: reward = 86.00, steps = 86\n",
      "12:10:19 [DEBUG] verify episode 623: reward = 84.00, steps = 84\n",
      "12:10:25 [DEBUG] verify episode 624: reward = 117.00, steps = 117\n",
      "12:10:29 [DEBUG] verify episode 625: reward = 88.00, steps = 88\n",
      "12:10:32 [DEBUG] verify episode 626: reward = 79.00, steps = 79\n",
      "12:10:38 [DEBUG] verify episode 627: reward = 119.00, steps = 119\n",
      "12:10:42 [DEBUG] verify episode 628: reward = 88.00, steps = 88\n",
      "12:10:48 [DEBUG] verify episode 629: reward = 125.00, steps = 125\n",
      "12:10:51 [DEBUG] verify episode 630: reward = 70.00, steps = 70\n",
      "12:10:56 [DEBUG] verify episode 631: reward = 113.00, steps = 113\n",
      "12:11:05 [DEBUG] verify episode 632: reward = 188.00, steps = 188\n",
      "12:11:10 [DEBUG] verify episode 633: reward = 117.00, steps = 117\n",
      "12:11:14 [DEBUG] verify episode 634: reward = 94.00, steps = 94\n",
      "12:11:21 [DEBUG] verify episode 635: reward = 148.00, steps = 148\n",
      "12:11:26 [DEBUG] verify episode 636: reward = 112.00, steps = 112\n",
      "12:11:31 [DEBUG] verify episode 637: reward = 117.00, steps = 117\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12:11:37 [DEBUG] verify episode 638: reward = 125.00, steps = 125\n",
      "12:11:41 [DEBUG] verify episode 639: reward = 87.00, steps = 87\n",
      "12:11:45 [DEBUG] verify episode 640: reward = 95.00, steps = 95\n",
      "12:11:49 [DEBUG] verify episode 641: reward = 81.00, steps = 81\n",
      "12:11:54 [DEBUG] verify episode 642: reward = 100.00, steps = 100\n",
      "12:12:00 [DEBUG] verify episode 643: reward = 143.00, steps = 143\n",
      "12:12:04 [DEBUG] verify episode 644: reward = 79.00, steps = 79\n",
      "12:12:11 [DEBUG] verify episode 645: reward = 153.00, steps = 153\n",
      "12:12:18 [DEBUG] verify episode 646: reward = 166.00, steps = 166\n",
      "12:12:22 [DEBUG] verify episode 647: reward = 92.00, steps = 92\n",
      "12:12:28 [DEBUG] verify episode 648: reward = 129.00, steps = 129\n",
      "12:12:33 [DEBUG] verify episode 649: reward = 112.00, steps = 112\n",
      "12:12:37 [DEBUG] verify episode 650: reward = 90.00, steps = 90\n",
      "12:12:41 [DEBUG] verify episode 651: reward = 75.00, steps = 75\n",
      "12:12:48 [DEBUG] verify episode 652: reward = 143.00, steps = 143\n",
      "12:12:53 [DEBUG] verify episode 653: reward = 108.00, steps = 108\n",
      "12:12:58 [DEBUG] verify episode 654: reward = 118.00, steps = 118\n",
      "12:13:03 [DEBUG] verify episode 655: reward = 101.00, steps = 101\n",
      "12:13:11 [DEBUG] verify episode 656: reward = 160.00, steps = 160\n",
      "12:13:17 [DEBUG] verify episode 657: reward = 147.00, steps = 147\n",
      "12:13:22 [DEBUG] verify episode 658: reward = 96.00, steps = 96\n",
      "12:13:26 [DEBUG] verify episode 659: reward = 93.00, steps = 93\n",
      "12:13:33 [DEBUG] verify episode 660: reward = 160.00, steps = 160\n",
      "12:13:38 [DEBUG] verify episode 661: reward = 95.00, steps = 95\n",
      "12:13:44 [DEBUG] verify episode 662: reward = 145.00, steps = 145\n",
      "12:13:53 [DEBUG] verify episode 663: reward = 200.00, steps = 200\n",
      "12:13:57 [DEBUG] verify episode 664: reward = 92.00, steps = 92\n",
      "12:14:02 [DEBUG] verify episode 665: reward = 95.00, steps = 95\n",
      "12:14:05 [DEBUG] verify episode 666: reward = 79.00, steps = 79\n",
      "12:14:10 [DEBUG] verify episode 667: reward = 106.00, steps = 106\n",
      "12:14:15 [DEBUG] verify episode 668: reward = 98.00, steps = 98\n",
      "12:14:22 [DEBUG] verify episode 669: reward = 163.00, steps = 163\n",
      "12:14:25 [DEBUG] verify episode 670: reward = 71.00, steps = 71\n",
      "12:14:31 [DEBUG] verify episode 671: reward = 134.00, steps = 134\n",
      "12:14:37 [DEBUG] verify episode 672: reward = 117.00, steps = 117\n",
      "12:14:43 [DEBUG] verify episode 673: reward = 138.00, steps = 138\n",
      "12:14:48 [DEBUG] verify episode 674: reward = 115.00, steps = 115\n",
      "12:14:55 [DEBUG] verify episode 675: reward = 141.00, steps = 141\n",
      "12:14:59 [DEBUG] verify episode 676: reward = 84.00, steps = 84\n",
      "12:15:04 [DEBUG] verify episode 677: reward = 126.00, steps = 126\n",
      "12:15:10 [DEBUG] verify episode 678: reward = 121.00, steps = 121\n",
      "12:15:15 [DEBUG] verify episode 679: reward = 105.00, steps = 105\n",
      "12:15:19 [DEBUG] verify episode 680: reward = 84.00, steps = 84\n",
      "12:15:24 [DEBUG] verify episode 681: reward = 112.00, steps = 112\n",
      "12:15:28 [DEBUG] verify episode 682: reward = 101.00, steps = 101\n",
      "12:15:32 [DEBUG] verify episode 683: reward = 78.00, steps = 78\n",
      "12:15:38 [DEBUG] verify episode 684: reward = 139.00, steps = 139\n",
      "12:15:44 [DEBUG] verify episode 685: reward = 135.00, steps = 135\n",
      "12:15:51 [DEBUG] verify episode 686: reward = 148.00, steps = 148\n",
      "12:15:58 [DEBUG] verify episode 687: reward = 152.00, steps = 152\n",
      "12:16:03 [DEBUG] verify episode 688: reward = 119.00, steps = 119\n",
      "12:16:08 [DEBUG] verify episode 689: reward = 108.00, steps = 108\n",
      "12:16:15 [DEBUG] verify episode 690: reward = 145.00, steps = 145\n",
      "12:16:20 [DEBUG] verify episode 691: reward = 113.00, steps = 113\n",
      "12:16:26 [DEBUG] verify episode 692: reward = 128.00, steps = 128\n",
      "12:16:31 [DEBUG] verify episode 693: reward = 104.00, steps = 104\n",
      "12:16:35 [DEBUG] verify episode 694: reward = 93.00, steps = 93\n",
      "12:16:38 [DEBUG] verify episode 695: reward = 78.00, steps = 78\n",
      "12:16:42 [DEBUG] verify episode 696: reward = 75.00, steps = 75\n",
      "12:16:46 [DEBUG] verify episode 697: reward = 93.00, steps = 93\n",
      "12:16:54 [DEBUG] verify episode 698: reward = 179.00, steps = 179\n",
      "12:16:59 [DEBUG] verify episode 699: reward = 115.00, steps = 115\n",
      "12:17:07 [DEBUG] verify episode 700: reward = 167.00, steps = 167\n",
      "12:17:16 [DEBUG] verify episode 701: reward = 192.00, steps = 192\n",
      "12:17:20 [DEBUG] verify episode 702: reward = 100.00, steps = 100\n",
      "12:17:29 [DEBUG] verify episode 703: reward = 200.00, steps = 200\n",
      "12:17:35 [DEBUG] verify episode 704: reward = 122.00, steps = 122\n",
      "12:17:44 [DEBUG] verify episode 705: reward = 200.00, steps = 200\n",
      "12:17:49 [DEBUG] verify episode 706: reward = 118.00, steps = 118\n",
      "12:17:54 [DEBUG] verify episode 707: reward = 117.00, steps = 117\n",
      "12:17:59 [DEBUG] verify episode 708: reward = 91.00, steps = 91\n",
      "12:18:03 [DEBUG] verify episode 709: reward = 89.00, steps = 89\n",
      "12:18:06 [DEBUG] verify episode 710: reward = 80.00, steps = 80\n",
      "12:18:15 [DEBUG] verify episode 711: reward = 186.00, steps = 186\n",
      "12:18:20 [DEBUG] verify episode 712: reward = 125.00, steps = 125\n",
      "12:18:30 [DEBUG] verify episode 713: reward = 200.00, steps = 200\n",
      "12:18:33 [DEBUG] verify episode 714: reward = 89.00, steps = 89\n",
      "12:18:41 [DEBUG] verify episode 715: reward = 170.00, steps = 170\n",
      "12:18:46 [DEBUG] verify episode 716: reward = 103.00, steps = 103\n",
      "12:18:55 [DEBUG] verify episode 717: reward = 193.00, steps = 193\n",
      "12:19:01 [DEBUG] verify episode 718: reward = 132.00, steps = 132\n",
      "12:19:07 [DEBUG] verify episode 719: reward = 138.00, steps = 138\n",
      "12:19:12 [DEBUG] verify episode 720: reward = 119.00, steps = 119\n",
      "12:19:20 [DEBUG] verify episode 721: reward = 173.00, steps = 173\n",
      "12:19:27 [DEBUG] verify episode 722: reward = 158.00, steps = 158\n",
      "12:19:33 [DEBUG] verify episode 723: reward = 122.00, steps = 122\n",
      "12:19:38 [DEBUG] verify episode 724: reward = 100.00, steps = 100\n",
      "12:19:44 [DEBUG] verify episode 725: reward = 133.00, steps = 133\n",
      "12:19:49 [DEBUG] verify episode 726: reward = 124.00, steps = 124\n",
      "12:19:56 [DEBUG] verify episode 727: reward = 148.00, steps = 148\n",
      "12:20:03 [DEBUG] verify episode 728: reward = 163.00, steps = 163\n",
      "12:20:08 [DEBUG] verify episode 729: reward = 104.00, steps = 104\n",
      "12:20:17 [DEBUG] verify episode 730: reward = 193.00, steps = 193\n",
      "12:20:22 [DEBUG] verify episode 731: reward = 106.00, steps = 106\n",
      "12:20:27 [DEBUG] verify episode 732: reward = 125.00, steps = 125\n",
      "12:20:34 [DEBUG] verify episode 733: reward = 154.00, steps = 154\n",
      "12:20:39 [DEBUG] verify episode 734: reward = 101.00, steps = 101\n",
      "12:20:47 [DEBUG] verify episode 735: reward = 166.00, steps = 166\n",
      "12:20:54 [DEBUG] verify episode 736: reward = 155.00, steps = 155\n",
      "12:20:58 [DEBUG] verify episode 737: reward = 105.00, steps = 105\n",
      "12:21:03 [DEBUG] verify episode 738: reward = 106.00, steps = 106\n",
      "12:21:08 [DEBUG] verify episode 739: reward = 113.00, steps = 113\n",
      "12:21:12 [DEBUG] verify episode 740: reward = 86.00, steps = 86\n",
      "12:21:19 [DEBUG] verify episode 741: reward = 140.00, steps = 140\n",
      "12:21:26 [DEBUG] verify episode 742: reward = 155.00, steps = 155\n",
      "12:21:33 [DEBUG] verify episode 743: reward = 172.00, steps = 172\n",
      "12:21:39 [DEBUG] verify episode 744: reward = 125.00, steps = 125\n",
      "12:21:47 [DEBUG] verify episode 745: reward = 180.00, steps = 180\n",
      "12:21:53 [DEBUG] verify episode 746: reward = 120.00, steps = 120\n",
      "12:22:00 [DEBUG] verify episode 747: reward = 163.00, steps = 163\n",
      "12:22:06 [DEBUG] verify episode 748: reward = 138.00, steps = 138\n",
      "12:22:11 [DEBUG] verify episode 749: reward = 93.00, steps = 93\n",
      "12:22:18 [DEBUG] verify episode 750: reward = 165.00, steps = 165\n",
      "12:22:24 [DEBUG] verify episode 751: reward = 135.00, steps = 135\n",
      "12:22:33 [DEBUG] verify episode 752: reward = 200.00, steps = 200\n",
      "12:22:41 [DEBUG] verify episode 753: reward = 159.00, steps = 159\n",
      "12:22:49 [DEBUG] verify episode 754: reward = 185.00, steps = 185\n",
      "12:22:58 [DEBUG] verify episode 755: reward = 200.00, steps = 200\n",
      "12:23:06 [DEBUG] verify episode 756: reward = 180.00, steps = 180\n",
      "12:23:12 [DEBUG] verify episode 757: reward = 125.00, steps = 125\n",
      "12:23:16 [DEBUG] verify episode 758: reward = 101.00, steps = 101\n",
      "12:23:21 [DEBUG] verify episode 759: reward = 105.00, steps = 105\n",
      "12:23:29 [DEBUG] verify episode 760: reward = 168.00, steps = 168\n",
      "12:23:33 [DEBUG] verify episode 761: reward = 102.00, steps = 102\n",
      "12:23:39 [DEBUG] verify episode 762: reward = 128.00, steps = 128\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12:23:48 [DEBUG] verify episode 763: reward = 200.00, steps = 200\n",
      "12:23:55 [DEBUG] verify episode 764: reward = 144.00, steps = 144\n",
      "12:24:02 [DEBUG] verify episode 765: reward = 154.00, steps = 154\n",
      "12:24:11 [DEBUG] verify episode 766: reward = 195.00, steps = 195\n",
      "12:24:17 [DEBUG] verify episode 767: reward = 148.00, steps = 148\n",
      "12:24:24 [DEBUG] verify episode 768: reward = 149.00, steps = 149\n",
      "12:24:30 [DEBUG] verify episode 769: reward = 128.00, steps = 128\n",
      "12:24:36 [DEBUG] verify episode 770: reward = 126.00, steps = 126\n",
      "12:24:42 [DEBUG] verify episode 771: reward = 150.00, steps = 150\n",
      "12:24:48 [DEBUG] verify episode 772: reward = 127.00, steps = 127\n",
      "12:24:58 [DEBUG] verify episode 773: reward = 200.00, steps = 200\n",
      "12:25:05 [DEBUG] verify episode 774: reward = 162.00, steps = 162\n",
      "12:25:15 [DEBUG] verify episode 775: reward = 200.00, steps = 200\n",
      "12:25:16 [DEBUG] verify episode 776: reward = 23.00, steps = 23\n",
      "12:25:22 [DEBUG] verify episode 777: reward = 146.00, steps = 146\n",
      "12:25:30 [DEBUG] verify episode 778: reward = 177.00, steps = 177\n",
      "12:25:35 [DEBUG] verify episode 779: reward = 111.00, steps = 111\n",
      "12:25:43 [DEBUG] verify episode 780: reward = 170.00, steps = 170\n",
      "12:25:50 [DEBUG] verify episode 781: reward = 155.00, steps = 155\n",
      "12:25:56 [DEBUG] verify episode 782: reward = 128.00, steps = 128\n",
      "12:26:03 [DEBUG] verify episode 783: reward = 165.00, steps = 165\n",
      "12:26:09 [DEBUG] verify episode 784: reward = 129.00, steps = 129\n",
      "12:26:18 [DEBUG] verify episode 785: reward = 200.00, steps = 200\n",
      "12:26:26 [DEBUG] verify episode 786: reward = 182.00, steps = 182\n",
      "12:26:34 [DEBUG] verify episode 787: reward = 168.00, steps = 168\n",
      "12:26:39 [DEBUG] verify episode 788: reward = 118.00, steps = 118\n",
      "12:26:47 [DEBUG] verify episode 789: reward = 162.00, steps = 162\n",
      "12:26:50 [DEBUG] verify episode 790: reward = 63.00, steps = 63\n",
      "12:26:55 [DEBUG] verify episode 791: reward = 112.00, steps = 112\n",
      "12:26:57 [DEBUG] verify episode 792: reward = 39.00, steps = 39\n",
      "12:27:03 [DEBUG] verify episode 793: reward = 143.00, steps = 143\n",
      "12:27:09 [DEBUG] verify episode 794: reward = 131.00, steps = 131\n",
      "12:27:14 [DEBUG] verify episode 795: reward = 115.00, steps = 115\n",
      "12:27:20 [DEBUG] verify episode 796: reward = 134.00, steps = 134\n",
      "12:27:24 [DEBUG] verify episode 797: reward = 72.00, steps = 72\n",
      "12:27:30 [DEBUG] verify episode 798: reward = 145.00, steps = 145\n",
      "12:27:32 [DEBUG] verify episode 799: reward = 43.00, steps = 43\n",
      "12:27:40 [DEBUG] verify episode 800: reward = 177.00, steps = 177\n",
      "12:27:43 [DEBUG] verify episode 801: reward = 50.00, steps = 50\n",
      "12:27:51 [DEBUG] verify episode 802: reward = 196.00, steps = 196\n",
      "12:28:00 [DEBUG] verify episode 803: reward = 200.00, steps = 200\n",
      "12:28:10 [DEBUG] verify episode 804: reward = 200.00, steps = 200\n",
      "12:28:15 [DEBUG] verify episode 805: reward = 110.00, steps = 110\n",
      "12:28:22 [DEBUG] verify episode 806: reward = 167.00, steps = 167\n",
      "12:28:27 [DEBUG] verify episode 807: reward = 98.00, steps = 98\n",
      "12:28:32 [DEBUG] verify episode 808: reward = 116.00, steps = 116\n",
      "12:28:39 [DEBUG] verify episode 809: reward = 146.00, steps = 146\n",
      "12:28:47 [DEBUG] verify episode 810: reward = 194.00, steps = 194\n",
      "12:28:54 [DEBUG] verify episode 811: reward = 141.00, steps = 141\n",
      "12:29:00 [DEBUG] verify episode 812: reward = 147.00, steps = 147\n",
      "12:29:07 [DEBUG] verify episode 813: reward = 155.00, steps = 155\n",
      "12:29:12 [DEBUG] verify episode 814: reward = 107.00, steps = 107\n",
      "12:29:17 [DEBUG] verify episode 815: reward = 109.00, steps = 109\n",
      "12:29:25 [DEBUG] verify episode 816: reward = 160.00, steps = 160\n",
      "12:29:32 [DEBUG] verify episode 817: reward = 165.00, steps = 165\n",
      "12:29:41 [DEBUG] verify episode 818: reward = 194.00, steps = 194\n",
      "12:29:47 [DEBUG] verify episode 819: reward = 135.00, steps = 135\n",
      "12:29:55 [DEBUG] verify episode 820: reward = 183.00, steps = 183\n",
      "12:30:04 [DEBUG] verify episode 821: reward = 200.00, steps = 200\n",
      "12:30:10 [DEBUG] verify episode 822: reward = 120.00, steps = 120\n",
      "12:30:15 [DEBUG] verify episode 823: reward = 115.00, steps = 115\n",
      "12:30:23 [DEBUG] verify episode 824: reward = 178.00, steps = 178\n",
      "12:30:30 [DEBUG] verify episode 825: reward = 147.00, steps = 147\n",
      "12:30:36 [DEBUG] verify episode 826: reward = 133.00, steps = 133\n",
      "12:30:41 [DEBUG] verify episode 827: reward = 125.00, steps = 125\n",
      "12:30:46 [DEBUG] verify episode 828: reward = 106.00, steps = 106\n",
      "12:30:52 [DEBUG] verify episode 829: reward = 136.00, steps = 136\n",
      "12:30:59 [DEBUG] verify episode 830: reward = 137.00, steps = 137\n",
      "12:31:06 [DEBUG] verify episode 831: reward = 168.00, steps = 168\n",
      "12:31:12 [DEBUG] verify episode 832: reward = 123.00, steps = 123\n",
      "12:31:20 [DEBUG] verify episode 833: reward = 186.00, steps = 186\n",
      "12:31:26 [DEBUG] verify episode 834: reward = 130.00, steps = 130\n",
      "12:31:32 [DEBUG] verify episode 835: reward = 126.00, steps = 126\n",
      "12:31:41 [DEBUG] verify episode 836: reward = 200.00, steps = 200\n",
      "12:31:50 [DEBUG] verify episode 837: reward = 200.00, steps = 200\n",
      "12:31:57 [DEBUG] verify episode 838: reward = 151.00, steps = 151\n",
      "12:32:06 [DEBUG] verify episode 839: reward = 200.00, steps = 200\n",
      "12:32:12 [DEBUG] verify episode 840: reward = 141.00, steps = 141\n",
      "12:32:17 [DEBUG] verify episode 841: reward = 108.00, steps = 108\n",
      "12:32:22 [DEBUG] verify episode 842: reward = 105.00, steps = 105\n",
      "12:32:28 [DEBUG] verify episode 843: reward = 135.00, steps = 135\n",
      "12:32:33 [DEBUG] verify episode 844: reward = 109.00, steps = 109\n",
      "12:32:42 [DEBUG] verify episode 845: reward = 198.00, steps = 198\n",
      "12:32:47 [DEBUG] verify episode 846: reward = 105.00, steps = 105\n",
      "12:32:51 [DEBUG] verify episode 847: reward = 103.00, steps = 103\n",
      "12:32:59 [DEBUG] verify episode 848: reward = 163.00, steps = 163\n",
      "12:33:04 [DEBUG] verify episode 849: reward = 115.00, steps = 115\n",
      "12:33:11 [DEBUG] verify episode 850: reward = 154.00, steps = 154\n",
      "12:33:17 [DEBUG] verify episode 851: reward = 137.00, steps = 137\n",
      "12:33:22 [DEBUG] verify episode 852: reward = 99.00, steps = 99\n",
      "12:33:30 [DEBUG] verify episode 853: reward = 171.00, steps = 171\n",
      "12:33:38 [DEBUG] verify episode 854: reward = 184.00, steps = 184\n",
      "12:33:43 [DEBUG] verify episode 855: reward = 121.00, steps = 121\n",
      "12:33:49 [DEBUG] verify episode 856: reward = 132.00, steps = 132\n",
      "12:33:54 [DEBUG] verify episode 857: reward = 111.00, steps = 111\n",
      "12:34:01 [DEBUG] verify episode 858: reward = 154.00, steps = 154\n",
      "12:34:07 [DEBUG] verify episode 859: reward = 131.00, steps = 131\n",
      "12:34:13 [DEBUG] verify episode 860: reward = 124.00, steps = 124\n",
      "12:34:19 [DEBUG] verify episode 861: reward = 128.00, steps = 128\n",
      "12:34:28 [DEBUG] verify episode 862: reward = 200.00, steps = 200\n",
      "12:34:34 [DEBUG] verify episode 863: reward = 137.00, steps = 137\n",
      "12:34:42 [DEBUG] verify episode 864: reward = 183.00, steps = 183\n",
      "12:34:47 [DEBUG] verify episode 865: reward = 101.00, steps = 101\n",
      "12:34:52 [DEBUG] verify episode 866: reward = 115.00, steps = 115\n",
      "12:34:58 [DEBUG] verify episode 867: reward = 123.00, steps = 123\n",
      "12:35:03 [DEBUG] verify episode 868: reward = 110.00, steps = 110\n",
      "12:35:11 [DEBUG] verify episode 869: reward = 171.00, steps = 171\n",
      "12:35:16 [DEBUG] verify episode 870: reward = 123.00, steps = 123\n",
      "12:35:22 [DEBUG] verify episode 871: reward = 113.00, steps = 113\n",
      "12:35:29 [DEBUG] verify episode 872: reward = 169.00, steps = 169\n",
      "12:35:36 [DEBUG] verify episode 873: reward = 148.00, steps = 148\n",
      "12:35:42 [DEBUG] verify episode 874: reward = 132.00, steps = 132\n",
      "12:35:47 [DEBUG] verify episode 875: reward = 111.00, steps = 111\n",
      "12:35:54 [DEBUG] verify episode 876: reward = 162.00, steps = 162\n",
      "12:36:03 [DEBUG] verify episode 877: reward = 200.00, steps = 200\n",
      "12:36:11 [DEBUG] verify episode 878: reward = 163.00, steps = 163\n",
      "12:36:18 [DEBUG] verify episode 879: reward = 153.00, steps = 153\n",
      "12:36:25 [DEBUG] verify episode 880: reward = 162.00, steps = 162\n",
      "12:36:31 [DEBUG] verify episode 881: reward = 141.00, steps = 141\n",
      "12:36:37 [DEBUG] verify episode 882: reward = 114.00, steps = 114\n",
      "12:36:46 [DEBUG] verify episode 883: reward = 200.00, steps = 200\n",
      "12:36:54 [DEBUG] verify episode 884: reward = 174.00, steps = 174\n",
      "12:36:59 [DEBUG] verify episode 885: reward = 109.00, steps = 109\n",
      "12:37:05 [DEBUG] verify episode 886: reward = 131.00, steps = 131\n",
      "12:37:10 [DEBUG] verify episode 887: reward = 112.00, steps = 112\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12:37:20 [DEBUG] verify episode 888: reward = 200.00, steps = 200\n",
      "12:37:26 [DEBUG] verify episode 889: reward = 152.00, steps = 152\n",
      "12:37:33 [DEBUG] verify episode 890: reward = 140.00, steps = 140\n",
      "12:37:39 [DEBUG] verify episode 891: reward = 131.00, steps = 131\n",
      "12:37:45 [DEBUG] verify episode 892: reward = 141.00, steps = 141\n",
      "12:37:51 [DEBUG] verify episode 893: reward = 126.00, steps = 126\n",
      "12:37:55 [DEBUG] verify episode 894: reward = 97.00, steps = 97\n",
      "12:38:02 [DEBUG] verify episode 895: reward = 152.00, steps = 152\n",
      "12:38:09 [DEBUG] verify episode 896: reward = 148.00, steps = 148\n",
      "12:38:14 [DEBUG] verify episode 897: reward = 109.00, steps = 109\n",
      "12:38:23 [DEBUG] verify episode 898: reward = 200.00, steps = 200\n",
      "12:38:28 [DEBUG] verify episode 899: reward = 101.00, steps = 101\n",
      "12:38:34 [DEBUG] verify episode 900: reward = 140.00, steps = 140\n",
      "12:38:40 [DEBUG] verify episode 901: reward = 134.00, steps = 134\n",
      "12:38:47 [DEBUG] verify episode 902: reward = 166.00, steps = 166\n",
      "12:38:56 [DEBUG] verify episode 903: reward = 200.00, steps = 200\n",
      "12:39:03 [DEBUG] verify episode 904: reward = 152.00, steps = 152\n",
      "12:39:10 [DEBUG] verify episode 905: reward = 149.00, steps = 149\n",
      "12:39:15 [DEBUG] verify episode 906: reward = 110.00, steps = 110\n",
      "12:39:20 [DEBUG] verify episode 907: reward = 111.00, steps = 111\n",
      "12:39:27 [DEBUG] verify episode 908: reward = 151.00, steps = 151\n",
      "12:39:34 [DEBUG] verify episode 909: reward = 149.00, steps = 149\n",
      "12:39:41 [DEBUG] verify episode 910: reward = 151.00, steps = 151\n",
      "12:39:46 [DEBUG] verify episode 911: reward = 110.00, steps = 110\n",
      "12:39:53 [DEBUG] verify episode 912: reward = 165.00, steps = 165\n",
      "12:40:01 [DEBUG] verify episode 913: reward = 165.00, steps = 165\n",
      "12:40:09 [DEBUG] verify episode 914: reward = 183.00, steps = 183\n",
      "12:40:16 [DEBUG] verify episode 915: reward = 147.00, steps = 147\n",
      "12:40:23 [DEBUG] verify episode 916: reward = 164.00, steps = 164\n",
      "12:40:30 [DEBUG] verify episode 917: reward = 144.00, steps = 144\n",
      "12:40:39 [DEBUG] verify episode 918: reward = 197.00, steps = 197\n",
      "12:40:45 [DEBUG] verify episode 919: reward = 118.00, steps = 118\n",
      "12:40:52 [DEBUG] verify episode 920: reward = 165.00, steps = 165\n",
      "12:40:59 [DEBUG] verify episode 921: reward = 140.00, steps = 140\n",
      "12:41:04 [DEBUG] verify episode 922: reward = 116.00, steps = 116\n",
      "12:41:09 [DEBUG] verify episode 923: reward = 117.00, steps = 117\n",
      "12:41:15 [DEBUG] verify episode 924: reward = 132.00, steps = 132\n",
      "12:41:20 [DEBUG] verify episode 925: reward = 94.00, steps = 94\n",
      "12:41:25 [DEBUG] verify episode 926: reward = 122.00, steps = 122\n",
      "12:41:32 [DEBUG] verify episode 927: reward = 150.00, steps = 150\n",
      "12:41:40 [DEBUG] verify episode 928: reward = 172.00, steps = 172\n",
      "12:41:46 [DEBUG] verify episode 929: reward = 126.00, steps = 126\n",
      "12:41:51 [DEBUG] verify episode 930: reward = 130.00, steps = 130\n",
      "12:41:58 [DEBUG] verify episode 931: reward = 140.00, steps = 140\n",
      "12:42:03 [DEBUG] verify episode 932: reward = 107.00, steps = 107\n",
      "12:42:12 [DEBUG] verify episode 933: reward = 200.00, steps = 200\n",
      "12:42:18 [DEBUG] verify episode 934: reward = 129.00, steps = 129\n",
      "12:42:27 [DEBUG] verify episode 935: reward = 200.00, steps = 200\n",
      "12:42:33 [DEBUG] verify episode 936: reward = 148.00, steps = 148\n",
      "12:42:41 [DEBUG] verify episode 937: reward = 164.00, steps = 164\n",
      "12:42:47 [DEBUG] verify episode 938: reward = 132.00, steps = 132\n",
      "12:42:52 [DEBUG] verify episode 939: reward = 108.00, steps = 108\n",
      "12:42:59 [DEBUG] verify episode 940: reward = 149.00, steps = 149\n",
      "12:43:04 [DEBUG] verify episode 941: reward = 121.00, steps = 121\n",
      "12:43:11 [DEBUG] verify episode 942: reward = 148.00, steps = 148\n",
      "12:43:17 [DEBUG] verify episode 943: reward = 138.00, steps = 138\n",
      "12:43:26 [DEBUG] verify episode 944: reward = 200.00, steps = 200\n",
      "12:43:31 [DEBUG] verify episode 945: reward = 112.00, steps = 112\n",
      "12:43:37 [DEBUG] verify episode 946: reward = 123.00, steps = 123\n",
      "12:43:46 [DEBUG] verify episode 947: reward = 200.00, steps = 200\n",
      "12:43:52 [DEBUG] verify episode 948: reward = 140.00, steps = 140\n",
      "12:44:01 [DEBUG] verify episode 949: reward = 185.00, steps = 185\n",
      "12:44:06 [DEBUG] verify episode 950: reward = 112.00, steps = 112\n",
      "12:44:11 [DEBUG] verify episode 951: reward = 127.00, steps = 127\n",
      "12:44:18 [DEBUG] verify episode 952: reward = 154.00, steps = 154\n",
      "12:44:23 [DEBUG] verify episode 953: reward = 100.00, steps = 100\n",
      "12:44:30 [DEBUG] verify episode 954: reward = 154.00, steps = 154\n",
      "12:44:38 [DEBUG] verify episode 955: reward = 178.00, steps = 178\n",
      "12:44:46 [DEBUG] verify episode 956: reward = 163.00, steps = 163\n",
      "12:44:51 [DEBUG] verify episode 957: reward = 104.00, steps = 104\n",
      "12:45:00 [DEBUG] verify episode 958: reward = 200.00, steps = 200\n",
      "12:45:09 [DEBUG] verify episode 959: reward = 196.00, steps = 196\n",
      "12:45:17 [DEBUG] verify episode 960: reward = 177.00, steps = 177\n",
      "12:45:22 [DEBUG] verify episode 961: reward = 119.00, steps = 119\n",
      "12:45:29 [DEBUG] verify episode 962: reward = 148.00, steps = 148\n",
      "12:45:35 [DEBUG] verify episode 963: reward = 122.00, steps = 122\n",
      "12:45:40 [DEBUG] verify episode 964: reward = 113.00, steps = 113\n",
      "12:45:48 [DEBUG] verify episode 965: reward = 175.00, steps = 175\n",
      "12:45:54 [DEBUG] verify episode 966: reward = 147.00, steps = 147\n",
      "12:46:01 [DEBUG] verify episode 967: reward = 147.00, steps = 147\n",
      "12:46:07 [DEBUG] verify episode 968: reward = 130.00, steps = 130\n",
      "12:46:14 [DEBUG] verify episode 969: reward = 162.00, steps = 162\n",
      "12:46:22 [DEBUG] verify episode 970: reward = 179.00, steps = 179\n",
      "12:46:28 [DEBUG] verify episode 971: reward = 130.00, steps = 130\n",
      "12:46:37 [DEBUG] verify episode 972: reward = 199.00, steps = 199\n",
      "12:46:46 [DEBUG] verify episode 973: reward = 200.00, steps = 200\n",
      "12:46:51 [DEBUG] verify episode 974: reward = 109.00, steps = 109\n",
      "12:47:00 [DEBUG] verify episode 975: reward = 193.00, steps = 193\n",
      "12:47:04 [DEBUG] verify episode 976: reward = 97.00, steps = 97\n",
      "12:47:12 [DEBUG] verify episode 977: reward = 165.00, steps = 165\n",
      "12:47:17 [DEBUG] verify episode 978: reward = 115.00, steps = 115\n",
      "12:47:24 [DEBUG] verify episode 979: reward = 159.00, steps = 159\n",
      "12:47:31 [DEBUG] verify episode 980: reward = 139.00, steps = 139\n",
      "12:47:38 [DEBUG] verify episode 981: reward = 173.00, steps = 173\n",
      "12:47:47 [DEBUG] verify episode 982: reward = 195.00, steps = 195\n",
      "12:47:53 [DEBUG] verify episode 983: reward = 129.00, steps = 129\n",
      "12:48:01 [DEBUG] verify episode 984: reward = 171.00, steps = 171\n",
      "12:48:08 [DEBUG] verify episode 985: reward = 157.00, steps = 157\n",
      "12:48:13 [DEBUG] verify episode 986: reward = 110.00, steps = 110\n",
      "12:48:18 [DEBUG] verify episode 987: reward = 113.00, steps = 113\n",
      "12:48:22 [DEBUG] verify episode 988: reward = 90.00, steps = 90\n",
      "12:48:28 [DEBUG] verify episode 989: reward = 125.00, steps = 125\n",
      "12:48:33 [DEBUG] verify episode 990: reward = 104.00, steps = 104\n",
      "12:48:38 [DEBUG] verify episode 991: reward = 119.00, steps = 119\n",
      "12:48:44 [DEBUG] verify episode 992: reward = 127.00, steps = 127\n",
      "12:48:51 [DEBUG] verify episode 993: reward = 157.00, steps = 157\n",
      "12:48:57 [DEBUG] verify episode 994: reward = 116.00, steps = 116\n",
      "12:49:01 [DEBUG] verify episode 995: reward = 104.00, steps = 104\n",
      "12:49:05 [DEBUG] verify episode 996: reward = 88.00, steps = 88\n",
      "12:49:13 [DEBUG] verify episode 997: reward = 179.00, steps = 179\n",
      "12:49:21 [DEBUG] verify episode 998: reward = 159.00, steps = 159\n",
      "12:49:26 [DEBUG] verify episode 999: reward = 125.00, steps = 125\n",
      "12:49:31 [DEBUG] verify episode 1000: reward = 98.00, steps = 98\n",
      "12:49:35 [DEBUG] verify episode 1001: reward = 96.00, steps = 96\n",
      "12:49:41 [DEBUG] verify episode 1002: reward = 123.00, steps = 123\n",
      "12:49:45 [DEBUG] verify episode 1003: reward = 93.00, steps = 93\n",
      "12:49:50 [DEBUG] verify episode 1004: reward = 111.00, steps = 111\n",
      "12:49:57 [DEBUG] verify episode 1005: reward = 151.00, steps = 151\n",
      "12:50:02 [DEBUG] verify episode 1006: reward = 111.00, steps = 111\n",
      "12:50:10 [DEBUG] verify episode 1007: reward = 170.00, steps = 170\n",
      "12:50:14 [DEBUG] verify episode 1008: reward = 95.00, steps = 95\n",
      "12:50:19 [DEBUG] verify episode 1009: reward = 105.00, steps = 105\n",
      "12:50:25 [DEBUG] verify episode 1010: reward = 130.00, steps = 130\n",
      "12:50:30 [DEBUG] verify episode 1011: reward = 101.00, steps = 101\n",
      "12:50:36 [DEBUG] verify episode 1012: reward = 130.00, steps = 130\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12:50:39 [DEBUG] verify episode 1013: reward = 83.00, steps = 83\n",
      "12:50:45 [DEBUG] verify episode 1014: reward = 127.00, steps = 127\n",
      "12:50:53 [DEBUG] verify episode 1015: reward = 163.00, steps = 163\n",
      "12:51:00 [DEBUG] verify episode 1016: reward = 166.00, steps = 166\n",
      "12:51:06 [DEBUG] verify episode 1017: reward = 130.00, steps = 130\n",
      "12:51:10 [DEBUG] verify episode 1018: reward = 95.00, steps = 95\n",
      "12:51:16 [DEBUG] verify episode 1019: reward = 134.00, steps = 134\n",
      "12:51:23 [DEBUG] verify episode 1020: reward = 139.00, steps = 139\n",
      "12:51:27 [DEBUG] verify episode 1021: reward = 96.00, steps = 96\n",
      "12:51:34 [DEBUG] verify episode 1022: reward = 155.00, steps = 155\n",
      "12:51:39 [DEBUG] verify episode 1023: reward = 107.00, steps = 107\n",
      "12:51:44 [DEBUG] verify episode 1024: reward = 107.00, steps = 107\n",
      "12:51:49 [DEBUG] verify episode 1025: reward = 105.00, steps = 105\n",
      "12:51:56 [DEBUG] verify episode 1026: reward = 147.00, steps = 147\n",
      "12:52:01 [DEBUG] verify episode 1027: reward = 125.00, steps = 125\n",
      "12:52:06 [DEBUG] verify episode 1028: reward = 108.00, steps = 108\n",
      "12:52:11 [DEBUG] verify episode 1029: reward = 108.00, steps = 108\n",
      "12:52:17 [DEBUG] verify episode 1030: reward = 129.00, steps = 129\n",
      "12:52:22 [DEBUG] verify episode 1031: reward = 113.00, steps = 113\n",
      "12:52:26 [DEBUG] verify episode 1032: reward = 80.00, steps = 80\n",
      "12:52:31 [DEBUG] verify episode 1033: reward = 106.00, steps = 106\n",
      "12:52:35 [DEBUG] verify episode 1034: reward = 91.00, steps = 91\n",
      "12:52:40 [DEBUG] verify episode 1035: reward = 106.00, steps = 106\n",
      "12:52:45 [DEBUG] verify episode 1036: reward = 121.00, steps = 121\n",
      "12:52:49 [DEBUG] verify episode 1037: reward = 91.00, steps = 91\n",
      "12:52:55 [DEBUG] verify episode 1038: reward = 114.00, steps = 114\n",
      "12:52:59 [DEBUG] verify episode 1039: reward = 103.00, steps = 103\n",
      "12:53:05 [DEBUG] verify episode 1040: reward = 111.00, steps = 111\n",
      "12:53:11 [DEBUG] verify episode 1041: reward = 154.00, steps = 154\n",
      "12:53:16 [DEBUG] verify episode 1042: reward = 93.00, steps = 93\n",
      "12:53:22 [DEBUG] verify episode 1043: reward = 129.00, steps = 129\n",
      "12:53:31 [DEBUG] verify episode 1044: reward = 200.00, steps = 200\n",
      "12:53:37 [DEBUG] verify episode 1045: reward = 128.00, steps = 128\n",
      "12:53:42 [DEBUG] verify episode 1046: reward = 121.00, steps = 121\n",
      "12:53:51 [DEBUG] verify episode 1047: reward = 200.00, steps = 200\n",
      "12:53:57 [DEBUG] verify episode 1048: reward = 130.00, steps = 130\n",
      "12:54:01 [DEBUG] verify episode 1049: reward = 93.00, steps = 93\n",
      "12:54:06 [DEBUG] verify episode 1050: reward = 94.00, steps = 94\n",
      "12:54:11 [DEBUG] verify episode 1051: reward = 118.00, steps = 118\n",
      "12:54:19 [DEBUG] verify episode 1052: reward = 171.00, steps = 171\n",
      "12:54:23 [DEBUG] verify episode 1053: reward = 101.00, steps = 101\n",
      "12:54:28 [DEBUG] verify episode 1054: reward = 95.00, steps = 95\n",
      "12:54:36 [DEBUG] verify episode 1055: reward = 180.00, steps = 180\n",
      "12:54:40 [DEBUG] verify episode 1056: reward = 98.00, steps = 98\n",
      "12:54:45 [DEBUG] verify episode 1057: reward = 109.00, steps = 109\n",
      "12:54:51 [DEBUG] verify episode 1058: reward = 122.00, steps = 122\n",
      "12:54:56 [DEBUG] verify episode 1059: reward = 100.00, steps = 100\n",
      "12:55:02 [DEBUG] verify episode 1060: reward = 142.00, steps = 142\n",
      "12:55:06 [DEBUG] verify episode 1061: reward = 96.00, steps = 96\n",
      "12:55:12 [DEBUG] verify episode 1062: reward = 130.00, steps = 130\n",
      "12:55:18 [DEBUG] verify episode 1063: reward = 126.00, steps = 126\n",
      "12:55:28 [DEBUG] verify episode 1064: reward = 200.00, steps = 200\n",
      "12:55:34 [DEBUG] verify episode 1065: reward = 132.00, steps = 132\n",
      "12:55:38 [DEBUG] verify episode 1066: reward = 98.00, steps = 98\n",
      "12:55:43 [DEBUG] verify episode 1067: reward = 90.00, steps = 90\n",
      "12:55:48 [DEBUG] verify episode 1068: reward = 128.00, steps = 128\n",
      "12:55:57 [DEBUG] verify episode 1069: reward = 182.00, steps = 182\n",
      "12:56:01 [DEBUG] verify episode 1070: reward = 105.00, steps = 105\n",
      "12:56:07 [DEBUG] verify episode 1071: reward = 131.00, steps = 131\n",
      "12:56:12 [DEBUG] verify episode 1072: reward = 100.00, steps = 100\n",
      "12:56:20 [DEBUG] verify episode 1073: reward = 186.00, steps = 186\n",
      "12:56:29 [DEBUG] verify episode 1074: reward = 193.00, steps = 193\n",
      "12:56:34 [DEBUG] verify episode 1075: reward = 100.00, steps = 100\n",
      "12:56:43 [DEBUG] verify episode 1076: reward = 200.00, steps = 200\n",
      "12:56:49 [DEBUG] verify episode 1077: reward = 146.00, steps = 146\n",
      "12:56:58 [DEBUG] verify episode 1078: reward = 200.00, steps = 200\n",
      "12:57:04 [DEBUG] verify episode 1079: reward = 116.00, steps = 116\n",
      "12:57:10 [DEBUG] verify episode 1080: reward = 131.00, steps = 131\n",
      "12:57:16 [DEBUG] verify episode 1081: reward = 144.00, steps = 144\n",
      "12:57:22 [DEBUG] verify episode 1082: reward = 118.00, steps = 118\n",
      "12:57:28 [DEBUG] verify episode 1083: reward = 130.00, steps = 130\n",
      "12:57:37 [DEBUG] verify episode 1084: reward = 200.00, steps = 200\n",
      "12:57:43 [DEBUG] verify episode 1085: reward = 154.00, steps = 154\n",
      "12:57:50 [DEBUG] verify episode 1086: reward = 155.00, steps = 155\n",
      "12:57:59 [DEBUG] verify episode 1087: reward = 179.00, steps = 179\n",
      "12:58:04 [DEBUG] verify episode 1088: reward = 119.00, steps = 119\n",
      "12:58:08 [DEBUG] verify episode 1089: reward = 93.00, steps = 93\n",
      "12:58:13 [DEBUG] verify episode 1090: reward = 111.00, steps = 111\n",
      "12:58:18 [DEBUG] verify episode 1091: reward = 101.00, steps = 101\n",
      "12:58:23 [DEBUG] verify episode 1092: reward = 97.00, steps = 97\n",
      "12:58:27 [DEBUG] verify episode 1093: reward = 102.00, steps = 102\n",
      "12:58:33 [DEBUG] verify episode 1094: reward = 133.00, steps = 133\n",
      "12:58:37 [DEBUG] verify episode 1095: reward = 88.00, steps = 88\n",
      "12:58:43 [DEBUG] verify episode 1096: reward = 126.00, steps = 126\n",
      "12:58:50 [DEBUG] verify episode 1097: reward = 154.00, steps = 154\n",
      "12:58:56 [DEBUG] verify episode 1098: reward = 142.00, steps = 142\n",
      "12:59:02 [DEBUG] verify episode 1099: reward = 115.00, steps = 115\n",
      "12:59:06 [DEBUG] verify episode 1100: reward = 95.00, steps = 95\n",
      "12:59:12 [DEBUG] verify episode 1101: reward = 123.00, steps = 123\n",
      "12:59:17 [DEBUG] verify episode 1102: reward = 119.00, steps = 119\n",
      "12:59:22 [DEBUG] verify episode 1103: reward = 119.00, steps = 119\n",
      "12:59:29 [DEBUG] verify episode 1104: reward = 140.00, steps = 140\n",
      "12:59:34 [DEBUG] verify episode 1105: reward = 119.00, steps = 119\n",
      "12:59:39 [DEBUG] verify episode 1106: reward = 112.00, steps = 112\n",
      "12:59:47 [DEBUG] verify episode 1107: reward = 171.00, steps = 171\n",
      "12:59:53 [DEBUG] verify episode 1108: reward = 141.00, steps = 141\n",
      "12:59:58 [DEBUG] verify episode 1109: reward = 109.00, steps = 109\n",
      "13:00:04 [DEBUG] verify episode 1110: reward = 130.00, steps = 130\n",
      "13:00:08 [DEBUG] verify episode 1111: reward = 84.00, steps = 84\n",
      "13:00:14 [DEBUG] verify episode 1112: reward = 121.00, steps = 121\n",
      "13:00:20 [DEBUG] verify episode 1113: reward = 143.00, steps = 143\n",
      "13:00:25 [DEBUG] verify episode 1114: reward = 114.00, steps = 114\n",
      "13:00:30 [DEBUG] verify episode 1115: reward = 98.00, steps = 98\n",
      "13:00:39 [DEBUG] verify episode 1116: reward = 194.00, steps = 194\n",
      "13:00:43 [DEBUG] verify episode 1117: reward = 99.00, steps = 99\n",
      "13:00:49 [DEBUG] verify episode 1118: reward = 134.00, steps = 134\n",
      "13:00:55 [DEBUG] verify episode 1119: reward = 130.00, steps = 130\n",
      "13:01:00 [DEBUG] verify episode 1120: reward = 95.00, steps = 95\n",
      "13:01:04 [DEBUG] verify episode 1121: reward = 101.00, steps = 101\n",
      "13:01:11 [DEBUG] verify episode 1122: reward = 146.00, steps = 146\n",
      "13:01:19 [DEBUG] verify episode 1123: reward = 183.00, steps = 183\n",
      "13:01:27 [DEBUG] verify episode 1124: reward = 181.00, steps = 181\n",
      "13:01:33 [DEBUG] verify episode 1125: reward = 128.00, steps = 128\n",
      "13:01:39 [DEBUG] verify episode 1126: reward = 138.00, steps = 138\n",
      "13:01:45 [DEBUG] verify episode 1127: reward = 111.00, steps = 111\n",
      "13:01:53 [DEBUG] verify episode 1128: reward = 189.00, steps = 189\n",
      "13:01:59 [DEBUG] verify episode 1129: reward = 124.00, steps = 124\n",
      "13:02:03 [DEBUG] verify episode 1130: reward = 94.00, steps = 94\n",
      "13:02:09 [DEBUG] verify episode 1131: reward = 122.00, steps = 122\n",
      "13:02:18 [DEBUG] verify episode 1132: reward = 200.00, steps = 200\n",
      "13:02:22 [DEBUG] verify episode 1133: reward = 102.00, steps = 102\n",
      "13:02:31 [DEBUG] verify episode 1134: reward = 200.00, steps = 200\n",
      "13:02:38 [DEBUG] verify episode 1135: reward = 139.00, steps = 139\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13:02:43 [DEBUG] verify episode 1136: reward = 122.00, steps = 122\n",
      "13:02:48 [DEBUG] verify episode 1137: reward = 106.00, steps = 106\n",
      "13:02:53 [DEBUG] verify episode 1138: reward = 101.00, steps = 101\n",
      "13:02:58 [DEBUG] verify episode 1139: reward = 110.00, steps = 110\n",
      "13:03:06 [DEBUG] verify episode 1140: reward = 168.00, steps = 168\n",
      "13:03:15 [DEBUG] verify episode 1141: reward = 200.00, steps = 200\n",
      "13:03:20 [DEBUG] verify episode 1142: reward = 123.00, steps = 123\n",
      "13:03:26 [DEBUG] verify episode 1143: reward = 138.00, steps = 138\n",
      "13:03:32 [DEBUG] verify episode 1144: reward = 116.00, steps = 116\n",
      "13:03:39 [DEBUG] verify episode 1145: reward = 162.00, steps = 162\n",
      "13:03:45 [DEBUG] verify episode 1146: reward = 137.00, steps = 137\n",
      "13:03:53 [DEBUG] verify episode 1147: reward = 173.00, steps = 173\n",
      "13:03:58 [DEBUG] verify episode 1148: reward = 116.00, steps = 116\n",
      "13:04:03 [DEBUG] verify episode 1149: reward = 106.00, steps = 106\n",
      "13:04:08 [DEBUG] verify episode 1150: reward = 110.00, steps = 110\n",
      "13:04:13 [DEBUG] verify episode 1151: reward = 110.00, steps = 110\n",
      "13:04:21 [DEBUG] verify episode 1152: reward = 158.00, steps = 158\n",
      "13:04:28 [DEBUG] verify episode 1153: reward = 164.00, steps = 164\n",
      "13:04:36 [DEBUG] verify episode 1154: reward = 175.00, steps = 175\n",
      "13:04:40 [DEBUG] verify episode 1155: reward = 88.00, steps = 88\n",
      "13:04:45 [DEBUG] verify episode 1156: reward = 111.00, steps = 111\n",
      "13:04:54 [DEBUG] verify episode 1157: reward = 182.00, steps = 182\n",
      "13:04:59 [DEBUG] verify episode 1158: reward = 120.00, steps = 120\n",
      "13:05:03 [DEBUG] verify episode 1159: reward = 89.00, steps = 89\n",
      "13:05:08 [DEBUG] verify episode 1160: reward = 118.00, steps = 118\n",
      "13:05:15 [DEBUG] verify episode 1161: reward = 155.00, steps = 155\n",
      "13:05:19 [DEBUG] verify episode 1162: reward = 82.00, steps = 82\n",
      "13:05:24 [DEBUG] verify episode 1163: reward = 111.00, steps = 111\n",
      "13:05:32 [DEBUG] verify episode 1164: reward = 173.00, steps = 173\n",
      "13:05:37 [DEBUG] verify episode 1165: reward = 101.00, steps = 101\n",
      "13:05:43 [DEBUG] verify episode 1166: reward = 128.00, steps = 128\n",
      "13:05:49 [DEBUG] verify episode 1167: reward = 133.00, steps = 133\n",
      "13:05:55 [DEBUG] verify episode 1168: reward = 136.00, steps = 136\n",
      "13:06:01 [DEBUG] verify episode 1169: reward = 137.00, steps = 137\n",
      "13:06:05 [DEBUG] verify episode 1170: reward = 94.00, steps = 94\n",
      "13:06:13 [DEBUG] verify episode 1171: reward = 163.00, steps = 163\n",
      "13:06:17 [DEBUG] verify episode 1172: reward = 102.00, steps = 102\n",
      "13:06:25 [DEBUG] verify episode 1173: reward = 173.00, steps = 173\n",
      "13:06:34 [DEBUG] verify episode 1174: reward = 200.00, steps = 200\n",
      "13:06:42 [DEBUG] verify episode 1175: reward = 180.00, steps = 180\n",
      "13:06:48 [DEBUG] verify episode 1176: reward = 125.00, steps = 125\n",
      "13:06:53 [DEBUG] verify episode 1177: reward = 104.00, steps = 104\n",
      "13:06:59 [DEBUG] verify episode 1178: reward = 123.00, steps = 123\n",
      "13:07:03 [DEBUG] verify episode 1179: reward = 102.00, steps = 102\n",
      "13:07:08 [DEBUG] verify episode 1180: reward = 105.00, steps = 105\n",
      "13:07:12 [DEBUG] verify episode 1181: reward = 89.00, steps = 89\n",
      "13:07:17 [DEBUG] verify episode 1182: reward = 101.00, steps = 101\n",
      "13:07:22 [DEBUG] verify episode 1183: reward = 106.00, steps = 106\n",
      "13:07:26 [DEBUG] verify episode 1184: reward = 98.00, steps = 98\n",
      "13:07:32 [DEBUG] verify episode 1185: reward = 119.00, steps = 119\n",
      "13:07:37 [DEBUG] verify episode 1186: reward = 106.00, steps = 106\n",
      "13:07:42 [DEBUG] verify episode 1187: reward = 114.00, steps = 114\n",
      "13:07:47 [DEBUG] verify episode 1188: reward = 95.00, steps = 95\n",
      "13:07:53 [DEBUG] verify episode 1189: reward = 127.00, steps = 127\n",
      "13:07:57 [DEBUG] verify episode 1190: reward = 101.00, steps = 101\n",
      "13:08:02 [DEBUG] verify episode 1191: reward = 106.00, steps = 106\n",
      "13:08:08 [DEBUG] verify episode 1192: reward = 120.00, steps = 120\n",
      "13:08:16 [DEBUG] verify episode 1193: reward = 182.00, steps = 182\n",
      "13:08:22 [DEBUG] verify episode 1194: reward = 125.00, steps = 125\n",
      "13:08:27 [DEBUG] verify episode 1195: reward = 127.00, steps = 127\n",
      "13:08:34 [DEBUG] verify episode 1196: reward = 125.00, steps = 125\n",
      "13:08:40 [DEBUG] verify episode 1197: reward = 128.00, steps = 128\n",
      "13:08:44 [DEBUG] verify episode 1198: reward = 93.00, steps = 93\n",
      "13:08:49 [DEBUG] verify episode 1199: reward = 116.00, steps = 116\n",
      "13:08:57 [DEBUG] verify episode 1200: reward = 155.00, steps = 155\n",
      "13:09:02 [DEBUG] verify episode 1201: reward = 114.00, steps = 114\n",
      "13:09:10 [DEBUG] verify episode 1202: reward = 153.00, steps = 153\n",
      "13:09:17 [DEBUG] verify episode 1203: reward = 135.00, steps = 135\n",
      "13:09:27 [DEBUG] verify episode 1204: reward = 200.00, steps = 200\n",
      "13:09:32 [DEBUG] verify episode 1205: reward = 93.00, steps = 93\n",
      "13:09:37 [DEBUG] verify episode 1206: reward = 96.00, steps = 96\n",
      "13:09:43 [DEBUG] verify episode 1207: reward = 128.00, steps = 128\n",
      "13:09:48 [DEBUG] verify episode 1208: reward = 97.00, steps = 97\n",
      "13:09:55 [DEBUG] verify episode 1209: reward = 133.00, steps = 133\n",
      "13:10:03 [DEBUG] verify episode 1210: reward = 166.00, steps = 166\n",
      "13:10:08 [DEBUG] verify episode 1211: reward = 92.00, steps = 92\n",
      "13:10:14 [DEBUG] verify episode 1212: reward = 113.00, steps = 113\n",
      "13:10:20 [DEBUG] verify episode 1213: reward = 107.00, steps = 107\n",
      "13:10:27 [DEBUG] verify episode 1214: reward = 126.00, steps = 126\n",
      "13:10:34 [DEBUG] verify episode 1215: reward = 136.00, steps = 136\n",
      "13:10:41 [DEBUG] verify episode 1216: reward = 127.00, steps = 127\n",
      "13:10:49 [DEBUG] verify episode 1217: reward = 162.00, steps = 162\n",
      "13:10:54 [DEBUG] verify episode 1218: reward = 93.00, steps = 93\n",
      "13:10:59 [DEBUG] verify episode 1219: reward = 99.00, steps = 99\n",
      "13:11:05 [DEBUG] verify episode 1220: reward = 97.00, steps = 97\n",
      "13:11:13 [DEBUG] verify episode 1221: reward = 124.00, steps = 124\n",
      "13:11:22 [DEBUG] verify episode 1222: reward = 153.00, steps = 153\n",
      "13:11:33 [DEBUG] verify episode 1223: reward = 200.00, steps = 200\n",
      "13:11:39 [DEBUG] verify episode 1224: reward = 109.00, steps = 109\n",
      "13:11:46 [DEBUG] verify episode 1225: reward = 129.00, steps = 129\n",
      "13:11:56 [DEBUG] verify episode 1226: reward = 133.00, steps = 133\n",
      "13:12:03 [DEBUG] verify episode 1227: reward = 116.00, steps = 116\n",
      "13:12:11 [DEBUG] verify episode 1228: reward = 136.00, steps = 136\n",
      "13:12:18 [DEBUG] verify episode 1229: reward = 122.00, steps = 122\n",
      "13:12:27 [DEBUG] verify episode 1230: reward = 166.00, steps = 166\n",
      "13:12:34 [DEBUG] verify episode 1231: reward = 128.00, steps = 128\n",
      "13:12:40 [DEBUG] verify episode 1232: reward = 103.00, steps = 103\n",
      "13:12:46 [DEBUG] verify episode 1233: reward = 103.00, steps = 103\n",
      "13:12:53 [DEBUG] verify episode 1234: reward = 124.00, steps = 124\n",
      "13:13:02 [DEBUG] verify episode 1235: reward = 147.00, steps = 147\n",
      "13:13:10 [DEBUG] verify episode 1236: reward = 141.00, steps = 141\n",
      "13:13:17 [DEBUG] verify episode 1237: reward = 128.00, steps = 128\n",
      "13:13:25 [DEBUG] verify episode 1238: reward = 131.00, steps = 131\n",
      "13:13:31 [DEBUG] verify episode 1239: reward = 107.00, steps = 107\n",
      "13:13:38 [DEBUG] verify episode 1240: reward = 125.00, steps = 125\n",
      "13:13:47 [DEBUG] verify episode 1241: reward = 160.00, steps = 160\n",
      "13:13:54 [DEBUG] verify episode 1242: reward = 119.00, steps = 119\n",
      "13:14:00 [DEBUG] verify episode 1243: reward = 107.00, steps = 107\n",
      "13:14:06 [DEBUG] verify episode 1244: reward = 97.00, steps = 97\n",
      "13:14:12 [DEBUG] verify episode 1245: reward = 112.00, steps = 112\n",
      "13:14:18 [DEBUG] verify episode 1246: reward = 107.00, steps = 107\n",
      "13:14:27 [DEBUG] verify episode 1247: reward = 151.00, steps = 151\n",
      "13:14:34 [DEBUG] verify episode 1248: reward = 124.00, steps = 124\n",
      "13:14:44 [DEBUG] verify episode 1249: reward = 172.00, steps = 172\n",
      "13:14:51 [DEBUG] verify episode 1250: reward = 131.00, steps = 131\n",
      "13:14:58 [DEBUG] verify episode 1251: reward = 115.00, steps = 115\n",
      "13:15:05 [DEBUG] verify episode 1252: reward = 127.00, steps = 127\n",
      "13:15:11 [DEBUG] verify episode 1253: reward = 119.00, steps = 119\n",
      "13:15:17 [DEBUG] verify episode 1254: reward = 86.00, steps = 86\n",
      "13:15:23 [DEBUG] verify episode 1255: reward = 120.00, steps = 120\n",
      "13:15:30 [DEBUG] verify episode 1256: reward = 119.00, steps = 119\n",
      "13:15:39 [DEBUG] verify episode 1257: reward = 146.00, steps = 146\n",
      "13:15:45 [DEBUG] verify episode 1258: reward = 104.00, steps = 104\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13:15:53 [DEBUG] verify episode 1259: reward = 146.00, steps = 146\n",
      "13:16:01 [DEBUG] verify episode 1260: reward = 139.00, steps = 139\n",
      "13:16:10 [DEBUG] verify episode 1261: reward = 167.00, steps = 167\n",
      "13:16:17 [DEBUG] verify episode 1262: reward = 123.00, steps = 123\n",
      "13:16:24 [DEBUG] verify episode 1263: reward = 127.00, steps = 127\n",
      "13:16:35 [DEBUG] verify episode 1264: reward = 200.00, steps = 200\n",
      "13:16:47 [DEBUG] verify episode 1265: reward = 197.00, steps = 197\n",
      "13:16:53 [DEBUG] verify episode 1266: reward = 111.00, steps = 111\n",
      "13:17:00 [DEBUG] verify episode 1267: reward = 122.00, steps = 122\n",
      "13:17:06 [DEBUG] verify episode 1268: reward = 109.00, steps = 109\n",
      "13:17:17 [DEBUG] verify episode 1269: reward = 183.00, steps = 183\n",
      "13:17:23 [DEBUG] verify episode 1270: reward = 111.00, steps = 111\n",
      "13:17:30 [DEBUG] verify episode 1271: reward = 135.00, steps = 135\n",
      "13:17:36 [DEBUG] verify episode 1272: reward = 102.00, steps = 102\n",
      "13:17:41 [DEBUG] verify episode 1273: reward = 91.00, steps = 91\n",
      "13:17:47 [DEBUG] verify episode 1274: reward = 108.00, steps = 108\n",
      "13:17:52 [DEBUG] verify episode 1275: reward = 80.00, steps = 80\n",
      "13:17:59 [DEBUG] verify episode 1276: reward = 127.00, steps = 127\n",
      "13:18:07 [DEBUG] verify episode 1277: reward = 132.00, steps = 132\n",
      "13:18:16 [DEBUG] verify episode 1278: reward = 156.00, steps = 156\n",
      "13:18:22 [DEBUG] verify episode 1279: reward = 110.00, steps = 110\n",
      "13:18:28 [DEBUG] verify episode 1280: reward = 107.00, steps = 107\n",
      "13:18:34 [DEBUG] verify episode 1281: reward = 104.00, steps = 104\n",
      "13:18:39 [DEBUG] verify episode 1282: reward = 94.00, steps = 94\n",
      "13:18:46 [DEBUG] verify episode 1283: reward = 123.00, steps = 123\n",
      "13:18:53 [DEBUG] verify episode 1284: reward = 131.00, steps = 131\n",
      "13:19:00 [DEBUG] verify episode 1285: reward = 113.00, steps = 113\n",
      "13:19:05 [DEBUG] verify episode 1286: reward = 106.00, steps = 106\n",
      "13:19:16 [DEBUG] verify episode 1287: reward = 160.00, steps = 160\n",
      "13:19:24 [DEBUG] verify episode 1288: reward = 104.00, steps = 104\n",
      "13:19:29 [DEBUG] verify episode 1289: reward = 86.00, steps = 86\n",
      "13:19:36 [DEBUG] verify episode 1290: reward = 96.00, steps = 96\n",
      "13:19:43 [DEBUG] verify episode 1291: reward = 120.00, steps = 120\n",
      "13:19:49 [DEBUG] verify episode 1292: reward = 111.00, steps = 111\n",
      "13:19:57 [DEBUG] verify episode 1293: reward = 129.00, steps = 129\n",
      "13:20:05 [DEBUG] verify episode 1294: reward = 131.00, steps = 131\n",
      "13:20:13 [DEBUG] verify episode 1295: reward = 143.00, steps = 143\n",
      "13:20:21 [DEBUG] verify episode 1296: reward = 126.00, steps = 126\n",
      "13:20:31 [DEBUG] verify episode 1297: reward = 167.00, steps = 167\n",
      "13:20:39 [DEBUG] verify episode 1298: reward = 135.00, steps = 135\n",
      "13:20:46 [DEBUG] verify episode 1299: reward = 117.00, steps = 117\n",
      "13:20:53 [DEBUG] verify episode 1300: reward = 126.00, steps = 126\n",
      "13:21:00 [DEBUG] verify episode 1301: reward = 106.00, steps = 106\n",
      "13:21:06 [DEBUG] verify episode 1302: reward = 107.00, steps = 107\n",
      "13:21:14 [DEBUG] verify episode 1303: reward = 141.00, steps = 141\n",
      "13:21:21 [DEBUG] verify episode 1304: reward = 123.00, steps = 123\n",
      "13:21:28 [DEBUG] verify episode 1305: reward = 117.00, steps = 117\n",
      "13:21:36 [DEBUG] verify episode 1306: reward = 136.00, steps = 136\n",
      "13:21:46 [DEBUG] verify episode 1307: reward = 172.00, steps = 172\n",
      "13:21:56 [DEBUG] verify episode 1308: reward = 172.00, steps = 172\n",
      "13:22:03 [DEBUG] verify episode 1309: reward = 113.00, steps = 113\n",
      "13:22:11 [DEBUG] verify episode 1310: reward = 142.00, steps = 142\n",
      "13:22:18 [DEBUG] verify episode 1311: reward = 132.00, steps = 132\n",
      "13:22:23 [DEBUG] verify episode 1312: reward = 92.00, steps = 92\n",
      "13:22:31 [DEBUG] verify episode 1313: reward = 133.00, steps = 133\n",
      "13:22:41 [DEBUG] verify episode 1314: reward = 148.00, steps = 148\n",
      "13:22:48 [DEBUG] verify episode 1315: reward = 120.00, steps = 120\n",
      "13:22:58 [DEBUG] verify episode 1316: reward = 142.00, steps = 142\n",
      "13:23:07 [DEBUG] verify episode 1317: reward = 147.00, steps = 147\n",
      "13:23:14 [DEBUG] verify episode 1318: reward = 133.00, steps = 133\n",
      "13:23:27 [DEBUG] verify episode 1319: reward = 200.00, steps = 200\n",
      "13:23:39 [DEBUG] verify episode 1320: reward = 187.00, steps = 187\n",
      "13:23:49 [DEBUG] verify episode 1321: reward = 187.00, steps = 187\n",
      "13:23:58 [DEBUG] verify episode 1322: reward = 163.00, steps = 163\n",
      "13:24:08 [DEBUG] verify episode 1323: reward = 187.00, steps = 187\n",
      "13:24:18 [DEBUG] verify episode 1324: reward = 182.00, steps = 182\n",
      "13:24:28 [DEBUG] verify episode 1325: reward = 195.00, steps = 195\n",
      "13:24:37 [DEBUG] verify episode 1326: reward = 151.00, steps = 151\n",
      "13:24:43 [DEBUG] verify episode 1327: reward = 117.00, steps = 117\n",
      "13:24:53 [DEBUG] verify episode 1328: reward = 181.00, steps = 181\n",
      "13:25:01 [DEBUG] verify episode 1329: reward = 147.00, steps = 147\n",
      "13:25:08 [DEBUG] verify episode 1330: reward = 122.00, steps = 122\n",
      "13:25:19 [DEBUG] verify episode 1331: reward = 200.00, steps = 200\n",
      "13:25:30 [DEBUG] verify episode 1332: reward = 200.00, steps = 200\n",
      "13:25:38 [DEBUG] verify episode 1333: reward = 154.00, steps = 154\n",
      "13:25:46 [DEBUG] verify episode 1334: reward = 144.00, steps = 144\n",
      "13:25:53 [DEBUG] verify episode 1335: reward = 132.00, steps = 132\n",
      "13:26:04 [DEBUG] verify episode 1336: reward = 200.00, steps = 200\n",
      "13:26:11 [DEBUG] verify episode 1337: reward = 137.00, steps = 137\n",
      "13:26:21 [DEBUG] verify episode 1338: reward = 166.00, steps = 166\n",
      "13:26:31 [DEBUG] verify episode 1339: reward = 200.00, steps = 200\n",
      "13:26:38 [DEBUG] verify episode 1340: reward = 125.00, steps = 125\n",
      "13:26:45 [DEBUG] verify episode 1341: reward = 131.00, steps = 131\n",
      "13:26:53 [DEBUG] verify episode 1342: reward = 153.00, steps = 153\n",
      "13:27:02 [DEBUG] verify episode 1343: reward = 164.00, steps = 164\n",
      "13:27:08 [DEBUG] verify episode 1344: reward = 107.00, steps = 107\n",
      "13:27:16 [DEBUG] verify episode 1345: reward = 143.00, steps = 143\n",
      "13:27:25 [DEBUG] verify episode 1346: reward = 160.00, steps = 160\n",
      "13:27:32 [DEBUG] verify episode 1347: reward = 127.00, steps = 127\n",
      "13:27:41 [DEBUG] verify episode 1348: reward = 165.00, steps = 165\n",
      "13:27:50 [DEBUG] verify episode 1349: reward = 173.00, steps = 173\n",
      "13:28:00 [DEBUG] verify episode 1350: reward = 173.00, steps = 173\n",
      "13:28:10 [DEBUG] verify episode 1351: reward = 200.00, steps = 200\n",
      "13:28:17 [DEBUG] verify episode 1352: reward = 120.00, steps = 120\n",
      "13:28:23 [DEBUG] verify episode 1353: reward = 113.00, steps = 113\n",
      "13:28:30 [DEBUG] verify episode 1354: reward = 132.00, steps = 132\n",
      "13:28:40 [DEBUG] verify episode 1355: reward = 171.00, steps = 171\n",
      "13:28:46 [DEBUG] verify episode 1356: reward = 112.00, steps = 112\n",
      "13:28:54 [DEBUG] verify episode 1357: reward = 162.00, steps = 162\n",
      "13:29:03 [DEBUG] verify episode 1358: reward = 162.00, steps = 162\n",
      "13:29:10 [DEBUG] verify episode 1359: reward = 131.00, steps = 131\n",
      "13:29:19 [DEBUG] verify episode 1360: reward = 153.00, steps = 153\n",
      "13:29:24 [DEBUG] verify episode 1361: reward = 105.00, steps = 105\n",
      "13:29:32 [DEBUG] verify episode 1362: reward = 145.00, steps = 145\n",
      "13:29:42 [DEBUG] verify episode 1363: reward = 173.00, steps = 173\n",
      "13:29:50 [DEBUG] verify episode 1364: reward = 147.00, steps = 147\n",
      "13:29:59 [DEBUG] verify episode 1365: reward = 179.00, steps = 179\n",
      "13:30:10 [DEBUG] verify episode 1366: reward = 200.00, steps = 200\n",
      "13:30:19 [DEBUG] verify episode 1367: reward = 168.00, steps = 168\n",
      "13:30:27 [DEBUG] verify episode 1368: reward = 144.00, steps = 144\n",
      "13:30:33 [DEBUG] verify episode 1369: reward = 110.00, steps = 110\n",
      "13:30:39 [DEBUG] verify episode 1370: reward = 102.00, steps = 102\n",
      "13:30:46 [DEBUG] verify episode 1371: reward = 136.00, steps = 136\n",
      "13:30:57 [DEBUG] verify episode 1372: reward = 200.00, steps = 200\n",
      "13:31:06 [DEBUG] verify episode 1373: reward = 165.00, steps = 165\n",
      "13:31:16 [DEBUG] verify episode 1374: reward = 187.00, steps = 187\n",
      "13:31:23 [DEBUG] verify episode 1375: reward = 126.00, steps = 126\n",
      "13:31:30 [DEBUG] verify episode 1376: reward = 124.00, steps = 124\n",
      "13:31:36 [DEBUG] verify episode 1377: reward = 104.00, steps = 104\n",
      "13:31:44 [DEBUG] verify episode 1378: reward = 163.00, steps = 163\n",
      "13:31:51 [DEBUG] verify episode 1379: reward = 126.00, steps = 126\n",
      "13:31:58 [DEBUG] verify episode 1380: reward = 120.00, steps = 120\n",
      "13:32:04 [DEBUG] verify episode 1381: reward = 106.00, steps = 106\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13:32:10 [DEBUG] verify episode 1382: reward = 115.00, steps = 115\n",
      "13:32:19 [DEBUG] verify episode 1383: reward = 166.00, steps = 166\n",
      "13:32:26 [DEBUG] verify episode 1384: reward = 123.00, steps = 123\n",
      "13:32:37 [DEBUG] verify episode 1385: reward = 200.00, steps = 200\n",
      "13:32:45 [DEBUG] verify episode 1386: reward = 154.00, steps = 154\n",
      "13:32:53 [DEBUG] verify episode 1387: reward = 132.00, steps = 132\n",
      "13:33:03 [DEBUG] verify episode 1388: reward = 200.00, steps = 200\n",
      "13:33:13 [DEBUG] verify episode 1389: reward = 169.00, steps = 169\n",
      "13:33:23 [DEBUG] verify episode 1390: reward = 200.00, steps = 200\n",
      "13:33:30 [DEBUG] verify episode 1391: reward = 114.00, steps = 114\n",
      "13:33:37 [DEBUG] verify episode 1392: reward = 132.00, steps = 132\n",
      "13:33:45 [DEBUG] verify episode 1393: reward = 148.00, steps = 148\n",
      "13:33:52 [DEBUG] verify episode 1394: reward = 132.00, steps = 132\n",
      "13:34:03 [DEBUG] verify episode 1395: reward = 189.00, steps = 189\n",
      "13:34:13 [DEBUG] verify episode 1396: reward = 152.00, steps = 152\n",
      "13:34:24 [DEBUG] verify episode 1397: reward = 158.00, steps = 158\n",
      "13:34:29 [DEBUG] verify episode 1398: reward = 90.00, steps = 90\n",
      "13:34:42 [DEBUG] verify episode 1399: reward = 200.00, steps = 200\n",
      "13:34:52 [DEBUG] verify episode 1400: reward = 149.00, steps = 149\n",
      "13:35:02 [DEBUG] verify episode 1401: reward = 143.00, steps = 143\n",
      "13:35:12 [DEBUG] verify episode 1402: reward = 163.00, steps = 163\n",
      "13:35:19 [DEBUG] verify episode 1403: reward = 109.00, steps = 109\n",
      "13:35:29 [DEBUG] verify episode 1404: reward = 149.00, steps = 149\n",
      "13:35:37 [DEBUG] verify episode 1405: reward = 134.00, steps = 134\n",
      "13:35:43 [DEBUG] verify episode 1406: reward = 98.00, steps = 98\n",
      "13:35:52 [DEBUG] verify episode 1407: reward = 144.00, steps = 144\n",
      "13:36:01 [DEBUG] verify episode 1408: reward = 146.00, steps = 146\n",
      "13:36:11 [DEBUG] verify episode 1409: reward = 170.00, steps = 170\n",
      "13:36:21 [DEBUG] verify episode 1410: reward = 157.00, steps = 157\n",
      "13:36:31 [DEBUG] verify episode 1411: reward = 146.00, steps = 146\n",
      "13:36:44 [DEBUG] verify episode 1412: reward = 189.00, steps = 189\n",
      "13:36:52 [DEBUG] verify episode 1413: reward = 112.00, steps = 112\n",
      "13:37:01 [DEBUG] verify episode 1414: reward = 162.00, steps = 162\n",
      "13:37:09 [DEBUG] verify episode 1415: reward = 133.00, steps = 133\n",
      "13:37:17 [DEBUG] verify episode 1416: reward = 153.00, steps = 153\n",
      "13:37:24 [DEBUG] verify episode 1417: reward = 122.00, steps = 122\n",
      "13:37:31 [DEBUG] verify episode 1418: reward = 119.00, steps = 119\n",
      "13:37:41 [DEBUG] verify episode 1419: reward = 175.00, steps = 175\n",
      "13:37:50 [DEBUG] verify episode 1420: reward = 181.00, steps = 181\n",
      "13:37:58 [DEBUG] verify episode 1421: reward = 152.00, steps = 152\n",
      "13:38:04 [DEBUG] verify episode 1422: reward = 134.00, steps = 134\n",
      "13:38:12 [DEBUG] verify episode 1423: reward = 158.00, steps = 158\n",
      "13:38:20 [DEBUG] verify episode 1424: reward = 149.00, steps = 149\n",
      "13:38:26 [DEBUG] verify episode 1425: reward = 132.00, steps = 132\n",
      "13:38:34 [DEBUG] verify episode 1426: reward = 156.00, steps = 156\n",
      "13:38:44 [DEBUG] verify episode 1427: reward = 173.00, steps = 173\n",
      "13:38:51 [DEBUG] verify episode 1428: reward = 122.00, steps = 122\n",
      "13:39:04 [DEBUG] verify episode 1429: reward = 200.00, steps = 200\n",
      "13:39:14 [DEBUG] verify episode 1430: reward = 135.00, steps = 135\n",
      "13:39:24 [DEBUG] verify episode 1431: reward = 115.00, steps = 115\n",
      "13:39:33 [DEBUG] verify episode 1432: reward = 119.00, steps = 119\n",
      "13:39:47 [DEBUG] verify episode 1433: reward = 196.00, steps = 196\n",
      "13:39:56 [DEBUG] verify episode 1434: reward = 157.00, steps = 157\n",
      "13:40:08 [DEBUG] verify episode 1435: reward = 200.00, steps = 200\n",
      "13:40:16 [DEBUG] verify episode 1436: reward = 152.00, steps = 152\n",
      "13:40:27 [DEBUG] verify episode 1437: reward = 188.00, steps = 188\n",
      "13:40:37 [DEBUG] verify episode 1438: reward = 167.00, steps = 167\n",
      "13:40:46 [DEBUG] verify episode 1439: reward = 128.00, steps = 128\n",
      "13:40:54 [DEBUG] verify episode 1440: reward = 156.00, steps = 156\n",
      "13:41:01 [DEBUG] verify episode 1441: reward = 125.00, steps = 125\n",
      "13:41:08 [DEBUG] verify episode 1442: reward = 128.00, steps = 128\n",
      "13:41:18 [DEBUG] verify episode 1443: reward = 178.00, steps = 178\n",
      "13:41:28 [DEBUG] verify episode 1444: reward = 181.00, steps = 181\n",
      "13:41:35 [DEBUG] verify episode 1445: reward = 148.00, steps = 148\n",
      "13:41:41 [DEBUG] verify episode 1446: reward = 104.00, steps = 104\n",
      "13:41:50 [DEBUG] verify episode 1447: reward = 184.00, steps = 184\n",
      "13:41:55 [DEBUG] verify episode 1448: reward = 117.00, steps = 117\n",
      "13:42:02 [DEBUG] verify episode 1449: reward = 136.00, steps = 136\n",
      "13:42:09 [DEBUG] verify episode 1450: reward = 140.00, steps = 140\n",
      "13:42:17 [DEBUG] verify episode 1451: reward = 150.00, steps = 150\n",
      "13:42:24 [DEBUG] verify episode 1452: reward = 134.00, steps = 134\n",
      "13:42:31 [DEBUG] verify episode 1453: reward = 155.00, steps = 155\n",
      "13:42:40 [DEBUG] verify episode 1454: reward = 170.00, steps = 170\n",
      "13:42:48 [DEBUG] verify episode 1455: reward = 168.00, steps = 168\n",
      "13:42:57 [DEBUG] verify episode 1456: reward = 188.00, steps = 188\n",
      "13:43:02 [DEBUG] verify episode 1457: reward = 105.00, steps = 105\n",
      "13:43:11 [DEBUG] verify episode 1458: reward = 169.00, steps = 169\n",
      "13:43:17 [DEBUG] verify episode 1459: reward = 137.00, steps = 137\n",
      "13:43:24 [DEBUG] verify episode 1460: reward = 132.00, steps = 132\n",
      "13:43:33 [DEBUG] verify episode 1461: reward = 185.00, steps = 185\n",
      "13:43:41 [DEBUG] verify episode 1462: reward = 160.00, steps = 160\n",
      "13:43:46 [DEBUG] verify episode 1463: reward = 121.00, steps = 121\n",
      "13:43:56 [DEBUG] verify episode 1464: reward = 200.00, steps = 200\n",
      "13:44:01 [DEBUG] verify episode 1465: reward = 98.00, steps = 98\n",
      "13:44:09 [DEBUG] verify episode 1466: reward = 159.00, steps = 159\n",
      "13:44:15 [DEBUG] verify episode 1467: reward = 126.00, steps = 126\n",
      "13:44:21 [DEBUG] verify episode 1468: reward = 128.00, steps = 128\n",
      "13:44:31 [DEBUG] verify episode 1469: reward = 200.00, steps = 200\n",
      "13:44:38 [DEBUG] verify episode 1470: reward = 140.00, steps = 140\n",
      "13:44:48 [DEBUG] verify episode 1471: reward = 200.00, steps = 200\n",
      "13:44:54 [DEBUG] verify episode 1472: reward = 124.00, steps = 124\n",
      "13:45:02 [DEBUG] verify episode 1473: reward = 164.00, steps = 164\n",
      "13:45:08 [DEBUG] verify episode 1474: reward = 121.00, steps = 121\n",
      "13:45:14 [DEBUG] verify episode 1475: reward = 115.00, steps = 115\n",
      "13:45:21 [DEBUG] verify episode 1476: reward = 152.00, steps = 152\n",
      "13:45:27 [DEBUG] verify episode 1477: reward = 112.00, steps = 112\n",
      "13:45:35 [DEBUG] verify episode 1478: reward = 173.00, steps = 173\n",
      "13:45:43 [DEBUG] verify episode 1479: reward = 146.00, steps = 146\n",
      "13:45:50 [DEBUG] verify episode 1480: reward = 141.00, steps = 141\n",
      "13:45:56 [DEBUG] verify episode 1481: reward = 122.00, steps = 122\n",
      "13:46:05 [DEBUG] verify episode 1482: reward = 200.00, steps = 200\n",
      "13:46:12 [DEBUG] verify episode 1483: reward = 128.00, steps = 128\n",
      "13:46:22 [DEBUG] verify episode 1484: reward = 198.00, steps = 198\n",
      "13:46:31 [DEBUG] verify episode 1485: reward = 200.00, steps = 200\n",
      "13:46:39 [DEBUG] verify episode 1486: reward = 165.00, steps = 165\n",
      "13:46:46 [DEBUG] verify episode 1487: reward = 138.00, steps = 138\n",
      "13:46:56 [DEBUG] verify episode 1488: reward = 200.00, steps = 200\n",
      "13:47:06 [DEBUG] verify episode 1489: reward = 195.00, steps = 195\n",
      "13:47:15 [DEBUG] verify episode 1490: reward = 193.00, steps = 193\n",
      "13:47:24 [DEBUG] verify episode 1491: reward = 177.00, steps = 177\n",
      "13:47:34 [DEBUG] verify episode 1492: reward = 187.00, steps = 187\n",
      "13:47:42 [DEBUG] verify episode 1493: reward = 161.00, steps = 161\n",
      "13:47:48 [DEBUG] verify episode 1494: reward = 111.00, steps = 111\n",
      "13:47:56 [DEBUG] verify episode 1495: reward = 148.00, steps = 148\n",
      "13:48:06 [DEBUG] verify episode 1496: reward = 200.00, steps = 200\n",
      "13:48:14 [DEBUG] verify episode 1497: reward = 157.00, steps = 157\n",
      "13:48:22 [DEBUG] verify episode 1498: reward = 152.00, steps = 152\n",
      "13:48:30 [DEBUG] verify episode 1499: reward = 140.00, steps = 140\n",
      "13:48:37 [DEBUG] verify episode 1500: reward = 136.00, steps = 136\n",
      "13:48:45 [DEBUG] verify episode 1501: reward = 149.00, steps = 149\n",
      "13:48:55 [DEBUG] verify episode 1502: reward = 200.00, steps = 200\n",
      "13:49:06 [DEBUG] verify episode 1503: reward = 200.00, steps = 200\n",
      "13:49:15 [DEBUG] verify episode 1504: reward = 174.00, steps = 174\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13:49:23 [DEBUG] verify episode 1505: reward = 161.00, steps = 161\n",
      "13:49:30 [DEBUG] verify episode 1506: reward = 137.00, steps = 137\n",
      "13:49:38 [DEBUG] verify episode 1507: reward = 139.00, steps = 139\n",
      "13:49:44 [DEBUG] verify episode 1508: reward = 122.00, steps = 122\n",
      "13:49:55 [DEBUG] verify episode 1509: reward = 200.00, steps = 200\n",
      "13:50:05 [DEBUG] verify episode 1510: reward = 200.00, steps = 200\n",
      "13:50:12 [DEBUG] verify episode 1511: reward = 127.00, steps = 127\n",
      "13:50:18 [DEBUG] verify episode 1512: reward = 125.00, steps = 125\n",
      "13:50:26 [DEBUG] verify episode 1513: reward = 152.00, steps = 152\n",
      "13:50:33 [DEBUG] verify episode 1514: reward = 118.00, steps = 118\n",
      "13:50:43 [DEBUG] verify episode 1515: reward = 197.00, steps = 197\n",
      "13:50:52 [DEBUG] verify episode 1516: reward = 169.00, steps = 169\n",
      "13:51:01 [DEBUG] verify episode 1517: reward = 183.00, steps = 183\n",
      "13:51:11 [DEBUG] verify episode 1518: reward = 189.00, steps = 189\n",
      "13:51:17 [DEBUG] verify episode 1519: reward = 110.00, steps = 110\n",
      "13:51:25 [DEBUG] verify episode 1520: reward = 152.00, steps = 152\n",
      "13:51:34 [DEBUG] verify episode 1521: reward = 176.00, steps = 176\n",
      "13:51:43 [DEBUG] verify episode 1522: reward = 162.00, steps = 162\n",
      "13:51:50 [DEBUG] verify episode 1523: reward = 143.00, steps = 143\n",
      "13:51:56 [DEBUG] verify episode 1524: reward = 113.00, steps = 113\n",
      "13:52:05 [DEBUG] verify episode 1525: reward = 179.00, steps = 179\n",
      "13:52:14 [DEBUG] verify episode 1526: reward = 163.00, steps = 163\n",
      "13:52:23 [DEBUG] verify episode 1527: reward = 170.00, steps = 170\n",
      "13:52:34 [DEBUG] verify episode 1528: reward = 200.00, steps = 200\n",
      "13:52:43 [DEBUG] verify episode 1529: reward = 173.00, steps = 173\n",
      "13:52:51 [DEBUG] verify episode 1530: reward = 160.00, steps = 160\n",
      "13:52:58 [DEBUG] verify episode 1531: reward = 140.00, steps = 140\n",
      "13:53:05 [DEBUG] verify episode 1532: reward = 126.00, steps = 126\n",
      "13:53:12 [DEBUG] verify episode 1533: reward = 144.00, steps = 144\n",
      "13:53:19 [DEBUG] verify episode 1534: reward = 132.00, steps = 132\n",
      "13:53:29 [DEBUG] verify episode 1535: reward = 178.00, steps = 178\n",
      "13:53:39 [DEBUG] verify episode 1536: reward = 189.00, steps = 189\n",
      "13:53:49 [DEBUG] verify episode 1537: reward = 200.00, steps = 200\n",
      "13:53:59 [DEBUG] verify episode 1538: reward = 200.00, steps = 200\n",
      "13:54:06 [DEBUG] verify episode 1539: reward = 123.00, steps = 123\n",
      "13:54:12 [DEBUG] verify episode 1540: reward = 113.00, steps = 113\n",
      "13:54:19 [DEBUG] verify episode 1541: reward = 141.00, steps = 141\n",
      "13:54:27 [DEBUG] verify episode 1542: reward = 146.00, steps = 146\n",
      "13:54:38 [DEBUG] verify episode 1543: reward = 200.00, steps = 200\n",
      "13:54:45 [DEBUG] verify episode 1544: reward = 137.00, steps = 137\n",
      "13:54:54 [DEBUG] verify episode 1545: reward = 186.00, steps = 186\n",
      "13:55:03 [DEBUG] verify episode 1546: reward = 159.00, steps = 159\n",
      "13:55:13 [DEBUG] verify episode 1547: reward = 199.00, steps = 199\n",
      "13:55:21 [DEBUG] verify episode 1548: reward = 144.00, steps = 144\n",
      "13:55:28 [DEBUG] verify episode 1549: reward = 141.00, steps = 141\n",
      "13:55:39 [DEBUG] verify episode 1550: reward = 200.00, steps = 200\n",
      "13:55:49 [DEBUG] verify episode 1551: reward = 200.00, steps = 200\n",
      "13:56:00 [DEBUG] verify episode 1552: reward = 200.00, steps = 200\n",
      "13:56:10 [DEBUG] verify episode 1553: reward = 200.00, steps = 200\n",
      "13:56:19 [DEBUG] verify episode 1554: reward = 174.00, steps = 174\n",
      "13:56:26 [DEBUG] verify episode 1555: reward = 129.00, steps = 129\n",
      "13:56:33 [DEBUG] verify episode 1556: reward = 140.00, steps = 140\n",
      "13:56:44 [DEBUG] verify episode 1557: reward = 200.00, steps = 200\n",
      "13:56:50 [DEBUG] verify episode 1558: reward = 122.00, steps = 122\n",
      "13:57:01 [DEBUG] verify episode 1559: reward = 200.00, steps = 200\n",
      "13:57:11 [DEBUG] verify episode 1560: reward = 200.00, steps = 200\n",
      "13:57:18 [DEBUG] verify episode 1561: reward = 123.00, steps = 123\n",
      "13:57:27 [DEBUG] verify episode 1562: reward = 178.00, steps = 178\n",
      "13:57:33 [DEBUG] verify episode 1563: reward = 106.00, steps = 106\n",
      "13:57:40 [DEBUG] verify episode 1564: reward = 141.00, steps = 141\n",
      "13:57:50 [DEBUG] verify episode 1565: reward = 200.00, steps = 200\n",
      "13:57:58 [DEBUG] verify episode 1566: reward = 145.00, steps = 145\n",
      "13:58:05 [DEBUG] verify episode 1567: reward = 139.00, steps = 139\n",
      "13:58:13 [DEBUG] verify episode 1568: reward = 148.00, steps = 148\n",
      "13:58:22 [DEBUG] verify episode 1569: reward = 169.00, steps = 169\n",
      "13:58:29 [DEBUG] verify episode 1570: reward = 140.00, steps = 140\n",
      "13:58:39 [DEBUG] verify episode 1571: reward = 176.00, steps = 176\n",
      "13:58:46 [DEBUG] verify episode 1572: reward = 139.00, steps = 139\n",
      "13:58:53 [DEBUG] verify episode 1573: reward = 129.00, steps = 129\n",
      "13:59:01 [DEBUG] verify episode 1574: reward = 146.00, steps = 146\n",
      "13:59:08 [DEBUG] verify episode 1575: reward = 132.00, steps = 132\n",
      "13:59:14 [DEBUG] verify episode 1576: reward = 121.00, steps = 121\n",
      "13:59:21 [DEBUG] verify episode 1577: reward = 136.00, steps = 136\n",
      "13:59:32 [DEBUG] verify episode 1578: reward = 200.00, steps = 200\n",
      "13:59:37 [DEBUG] verify episode 1579: reward = 107.00, steps = 107\n",
      "13:59:44 [DEBUG] verify episode 1580: reward = 132.00, steps = 132\n",
      "13:59:54 [DEBUG] verify episode 1581: reward = 181.00, steps = 181\n",
      "13:59:59 [DEBUG] verify episode 1582: reward = 109.00, steps = 109\n",
      "14:00:09 [DEBUG] verify episode 1583: reward = 184.00, steps = 184\n",
      "14:00:19 [DEBUG] verify episode 1584: reward = 193.00, steps = 193\n",
      "14:00:26 [DEBUG] verify episode 1585: reward = 132.00, steps = 132\n",
      "14:00:34 [DEBUG] verify episode 1586: reward = 157.00, steps = 157\n",
      "14:00:43 [DEBUG] verify episode 1587: reward = 158.00, steps = 158\n",
      "14:00:53 [DEBUG] verify episode 1588: reward = 184.00, steps = 184\n",
      "14:01:03 [DEBUG] verify episode 1589: reward = 200.00, steps = 200\n",
      "14:01:10 [DEBUG] verify episode 1590: reward = 129.00, steps = 129\n",
      "14:01:17 [DEBUG] verify episode 1591: reward = 144.00, steps = 144\n",
      "14:01:25 [DEBUG] verify episode 1592: reward = 148.00, steps = 148\n",
      "14:01:35 [DEBUG] verify episode 1593: reward = 188.00, steps = 188\n",
      "14:01:42 [DEBUG] verify episode 1594: reward = 123.00, steps = 123\n",
      "14:01:50 [DEBUG] verify episode 1595: reward = 160.00, steps = 160\n",
      "14:01:56 [DEBUG] verify episode 1596: reward = 111.00, steps = 111\n",
      "14:02:04 [DEBUG] verify episode 1597: reward = 145.00, steps = 145\n",
      "14:02:14 [DEBUG] verify episode 1598: reward = 195.00, steps = 195\n",
      "14:02:24 [DEBUG] verify episode 1599: reward = 200.00, steps = 200\n",
      "14:02:33 [DEBUG] verify episode 1600: reward = 164.00, steps = 164\n",
      "14:02:39 [DEBUG] verify episode 1601: reward = 122.00, steps = 122\n",
      "14:02:47 [DEBUG] verify episode 1602: reward = 151.00, steps = 151\n",
      "14:02:58 [DEBUG] verify episode 1603: reward = 200.00, steps = 200\n",
      "14:03:04 [DEBUG] verify episode 1604: reward = 118.00, steps = 118\n",
      "14:03:12 [DEBUG] verify episode 1605: reward = 141.00, steps = 141\n",
      "14:03:20 [DEBUG] verify episode 1606: reward = 162.00, steps = 162\n",
      "14:03:28 [DEBUG] verify episode 1607: reward = 145.00, steps = 145\n",
      "14:03:37 [DEBUG] verify episode 1608: reward = 178.00, steps = 178\n",
      "14:03:45 [DEBUG] verify episode 1609: reward = 145.00, steps = 145\n",
      "14:03:55 [DEBUG] verify episode 1610: reward = 200.00, steps = 200\n",
      "14:04:02 [DEBUG] verify episode 1611: reward = 138.00, steps = 138\n",
      "14:04:10 [DEBUG] verify episode 1612: reward = 144.00, steps = 144\n",
      "14:04:20 [DEBUG] verify episode 1613: reward = 200.00, steps = 200\n",
      "14:04:28 [DEBUG] verify episode 1614: reward = 138.00, steps = 138\n",
      "14:04:38 [DEBUG] verify episode 1615: reward = 187.00, steps = 187\n",
      "14:04:44 [DEBUG] verify episode 1616: reward = 115.00, steps = 115\n",
      "14:04:50 [DEBUG] verify episode 1617: reward = 119.00, steps = 119\n",
      "14:04:58 [DEBUG] verify episode 1618: reward = 150.00, steps = 150\n",
      "14:05:09 [DEBUG] verify episode 1619: reward = 200.00, steps = 200\n",
      "14:05:19 [DEBUG] verify episode 1620: reward = 200.00, steps = 200\n",
      "14:05:30 [DEBUG] verify episode 1621: reward = 200.00, steps = 200\n",
      "14:05:38 [DEBUG] verify episode 1622: reward = 155.00, steps = 155\n",
      "14:05:45 [DEBUG] verify episode 1623: reward = 142.00, steps = 142\n",
      "14:05:53 [DEBUG] verify episode 1624: reward = 140.00, steps = 140\n",
      "14:05:59 [DEBUG] verify episode 1625: reward = 132.00, steps = 132\n",
      "14:06:06 [DEBUG] verify episode 1626: reward = 120.00, steps = 120\n",
      "14:06:13 [DEBUG] verify episode 1627: reward = 132.00, steps = 132\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14:06:23 [DEBUG] verify episode 1628: reward = 200.00, steps = 200\n",
      "14:06:29 [DEBUG] verify episode 1629: reward = 113.00, steps = 113\n",
      "14:06:40 [DEBUG] verify episode 1630: reward = 195.00, steps = 195\n",
      "14:06:45 [DEBUG] verify episode 1631: reward = 106.00, steps = 106\n",
      "14:06:51 [DEBUG] verify episode 1632: reward = 105.00, steps = 105\n",
      "14:07:01 [DEBUG] verify episode 1633: reward = 189.00, steps = 189\n",
      "14:07:09 [DEBUG] verify episode 1634: reward = 166.00, steps = 166\n",
      "14:07:19 [DEBUG] verify episode 1635: reward = 179.00, steps = 179\n",
      "14:07:27 [DEBUG] verify episode 1636: reward = 154.00, steps = 154\n",
      "14:07:35 [DEBUG] verify episode 1637: reward = 151.00, steps = 151\n",
      "14:07:45 [DEBUG] verify episode 1638: reward = 200.00, steps = 200\n",
      "14:07:56 [DEBUG] verify episode 1639: reward = 200.00, steps = 200\n",
      "14:08:02 [DEBUG] verify episode 1640: reward = 118.00, steps = 118\n",
      "14:08:13 [DEBUG] verify episode 1641: reward = 200.00, steps = 200\n",
      "14:08:23 [DEBUG] verify episode 1642: reward = 191.00, steps = 191\n",
      "14:08:30 [DEBUG] verify episode 1643: reward = 138.00, steps = 138\n",
      "14:08:37 [DEBUG] verify episode 1644: reward = 133.00, steps = 133\n",
      "14:08:44 [DEBUG] verify episode 1645: reward = 127.00, steps = 127\n",
      "14:08:53 [DEBUG] verify episode 1646: reward = 178.00, steps = 178\n",
      "14:08:58 [DEBUG] verify episode 1647: reward = 97.00, steps = 97\n",
      "14:09:08 [DEBUG] verify episode 1648: reward = 186.00, steps = 186\n",
      "14:09:16 [DEBUG] verify episode 1649: reward = 155.00, steps = 155\n",
      "14:09:22 [DEBUG] verify episode 1650: reward = 117.00, steps = 117\n",
      "14:09:32 [DEBUG] verify episode 1651: reward = 179.00, steps = 179\n",
      "14:09:40 [DEBUG] verify episode 1652: reward = 154.00, steps = 154\n",
      "14:09:49 [DEBUG] verify episode 1653: reward = 140.00, steps = 140\n",
      "14:09:58 [DEBUG] verify episode 1654: reward = 175.00, steps = 175\n",
      "14:10:09 [DEBUG] verify episode 1655: reward = 199.00, steps = 199\n",
      "14:10:18 [DEBUG] verify episode 1656: reward = 186.00, steps = 186\n",
      "14:10:29 [DEBUG] verify episode 1657: reward = 200.00, steps = 200\n",
      "14:10:36 [DEBUG] verify episode 1658: reward = 136.00, steps = 136\n",
      "14:10:44 [DEBUG] verify episode 1659: reward = 144.00, steps = 144\n",
      "14:10:52 [DEBUG] verify episode 1660: reward = 157.00, steps = 157\n",
      "14:10:59 [DEBUG] verify episode 1661: reward = 136.00, steps = 136\n",
      "14:11:07 [DEBUG] verify episode 1662: reward = 132.00, steps = 132\n",
      "14:11:14 [DEBUG] verify episode 1663: reward = 138.00, steps = 138\n",
      "14:11:23 [DEBUG] verify episode 1664: reward = 165.00, steps = 165\n",
      "14:11:33 [DEBUG] verify episode 1665: reward = 195.00, steps = 195\n",
      "14:11:43 [DEBUG] verify episode 1666: reward = 200.00, steps = 200\n",
      "14:11:50 [DEBUG] verify episode 1667: reward = 132.00, steps = 132\n",
      "14:12:01 [DEBUG] verify episode 1668: reward = 200.00, steps = 200\n",
      "14:12:11 [DEBUG] verify episode 1669: reward = 187.00, steps = 187\n",
      "14:12:18 [DEBUG] verify episode 1670: reward = 139.00, steps = 139\n",
      "14:12:25 [DEBUG] verify episode 1671: reward = 140.00, steps = 140\n",
      "14:12:33 [DEBUG] verify episode 1672: reward = 156.00, steps = 156\n",
      "14:12:41 [DEBUG] verify episode 1673: reward = 134.00, steps = 134\n",
      "14:12:51 [DEBUG] verify episode 1674: reward = 200.00, steps = 200\n",
      "14:12:57 [DEBUG] verify episode 1675: reward = 118.00, steps = 118\n",
      "14:13:04 [DEBUG] verify episode 1676: reward = 120.00, steps = 120\n",
      "14:13:12 [DEBUG] verify episode 1677: reward = 168.00, steps = 168\n",
      "14:13:22 [DEBUG] verify episode 1678: reward = 163.00, steps = 163\n",
      "14:13:30 [DEBUG] verify episode 1679: reward = 137.00, steps = 137\n",
      "14:13:38 [DEBUG] verify episode 1680: reward = 158.00, steps = 158\n",
      "14:13:47 [DEBUG] verify episode 1681: reward = 161.00, steps = 161\n",
      "14:13:55 [DEBUG] verify episode 1682: reward = 134.00, steps = 134\n",
      "14:14:06 [DEBUG] verify episode 1683: reward = 200.00, steps = 200\n",
      "14:14:14 [DEBUG] verify episode 1684: reward = 153.00, steps = 153\n",
      "14:14:23 [DEBUG] verify episode 1685: reward = 166.00, steps = 166\n",
      "14:14:36 [DEBUG] verify episode 1686: reward = 200.00, steps = 200\n",
      "14:14:46 [DEBUG] verify episode 1687: reward = 171.00, steps = 171\n",
      "14:14:52 [DEBUG] verify episode 1688: reward = 110.00, steps = 110\n",
      "14:14:59 [DEBUG] verify episode 1689: reward = 126.00, steps = 126\n",
      "14:15:07 [DEBUG] verify episode 1690: reward = 148.00, steps = 148\n",
      "14:15:17 [DEBUG] verify episode 1691: reward = 170.00, steps = 170\n",
      "14:15:24 [DEBUG] verify episode 1692: reward = 121.00, steps = 121\n",
      "14:15:36 [DEBUG] verify episode 1693: reward = 200.00, steps = 200\n",
      "14:15:48 [DEBUG] verify episode 1694: reward = 178.00, steps = 178\n",
      "14:15:54 [DEBUG] verify episode 1695: reward = 116.00, steps = 116\n",
      "14:16:01 [DEBUG] verify episode 1696: reward = 126.00, steps = 126\n",
      "14:16:08 [DEBUG] verify episode 1697: reward = 122.00, steps = 122\n",
      "14:16:15 [DEBUG] verify episode 1698: reward = 124.00, steps = 124\n",
      "14:16:26 [DEBUG] verify episode 1699: reward = 200.00, steps = 200\n",
      "14:16:37 [DEBUG] verify episode 1700: reward = 193.00, steps = 193\n",
      "14:16:50 [DEBUG] verify episode 1701: reward = 200.00, steps = 200\n",
      "14:17:02 [DEBUG] verify episode 1702: reward = 189.00, steps = 189\n",
      "14:17:12 [DEBUG] verify episode 1703: reward = 200.00, steps = 200\n",
      "14:17:22 [DEBUG] verify episode 1704: reward = 177.00, steps = 177\n",
      "14:17:32 [DEBUG] verify episode 1705: reward = 200.00, steps = 200\n",
      "14:17:41 [DEBUG] verify episode 1706: reward = 161.00, steps = 161\n",
      "14:17:47 [DEBUG] verify episode 1707: reward = 128.00, steps = 128\n",
      "14:17:55 [DEBUG] verify episode 1708: reward = 133.00, steps = 133\n",
      "14:18:05 [DEBUG] verify episode 1709: reward = 200.00, steps = 200\n",
      "14:18:13 [DEBUG] verify episode 1710: reward = 150.00, steps = 150\n",
      "14:18:21 [DEBUG] verify episode 1711: reward = 142.00, steps = 142\n",
      "14:18:27 [DEBUG] verify episode 1712: reward = 125.00, steps = 125\n",
      "14:18:34 [DEBUG] verify episode 1713: reward = 131.00, steps = 131\n",
      "14:18:43 [DEBUG] verify episode 1714: reward = 160.00, steps = 160\n",
      "14:18:49 [DEBUG] verify episode 1715: reward = 127.00, steps = 127\n",
      "14:18:55 [DEBUG] verify episode 1716: reward = 113.00, steps = 113\n",
      "14:19:03 [DEBUG] verify episode 1717: reward = 137.00, steps = 137\n",
      "14:19:10 [DEBUG] verify episode 1718: reward = 136.00, steps = 136\n",
      "14:19:20 [DEBUG] verify episode 1719: reward = 200.00, steps = 200\n",
      "14:19:27 [DEBUG] verify episode 1720: reward = 128.00, steps = 128\n",
      "14:19:35 [DEBUG] verify episode 1721: reward = 140.00, steps = 140\n",
      "14:19:42 [DEBUG] verify episode 1722: reward = 135.00, steps = 135\n",
      "14:19:50 [DEBUG] verify episode 1723: reward = 150.00, steps = 150\n",
      "14:20:00 [DEBUG] verify episode 1724: reward = 200.00, steps = 200\n",
      "14:20:11 [DEBUG] verify episode 1725: reward = 200.00, steps = 200\n",
      "14:20:21 [DEBUG] verify episode 1726: reward = 200.00, steps = 200\n",
      "14:20:32 [DEBUG] verify episode 1727: reward = 200.00, steps = 200\n",
      "14:20:39 [DEBUG] verify episode 1728: reward = 127.00, steps = 127\n",
      "14:20:47 [DEBUG] verify episode 1729: reward = 147.00, steps = 147\n",
      "14:20:54 [DEBUG] verify episode 1730: reward = 134.00, steps = 134\n",
      "14:21:03 [DEBUG] verify episode 1731: reward = 178.00, steps = 178\n",
      "14:21:10 [DEBUG] verify episode 1732: reward = 133.00, steps = 133\n",
      "14:21:18 [DEBUG] verify episode 1733: reward = 136.00, steps = 136\n",
      "14:21:27 [DEBUG] verify episode 1734: reward = 173.00, steps = 173\n",
      "14:21:35 [DEBUG] verify episode 1735: reward = 153.00, steps = 153\n",
      "14:21:42 [DEBUG] verify episode 1736: reward = 134.00, steps = 134\n",
      "14:21:52 [DEBUG] verify episode 1737: reward = 193.00, steps = 193\n",
      "14:21:59 [DEBUG] verify episode 1738: reward = 136.00, steps = 136\n",
      "14:22:08 [DEBUG] verify episode 1739: reward = 164.00, steps = 164\n",
      "14:22:14 [DEBUG] verify episode 1740: reward = 104.00, steps = 104\n",
      "14:22:21 [DEBUG] verify episode 1741: reward = 129.00, steps = 129\n",
      "14:22:28 [DEBUG] verify episode 1742: reward = 139.00, steps = 139\n",
      "14:22:37 [DEBUG] verify episode 1743: reward = 183.00, steps = 183\n",
      "14:22:44 [DEBUG] verify episode 1744: reward = 124.00, steps = 124\n",
      "14:22:52 [DEBUG] verify episode 1745: reward = 144.00, steps = 144\n",
      "14:22:58 [DEBUG] verify episode 1746: reward = 111.00, steps = 111\n",
      "14:23:04 [DEBUG] verify episode 1747: reward = 123.00, steps = 123\n",
      "14:23:15 [DEBUG] verify episode 1748: reward = 195.00, steps = 195\n",
      "14:23:24 [DEBUG] verify episode 1749: reward = 167.00, steps = 167\n",
      "14:23:30 [DEBUG] verify episode 1750: reward = 113.00, steps = 113\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14:23:39 [DEBUG] verify episode 1751: reward = 155.00, steps = 155\n",
      "14:23:45 [DEBUG] verify episode 1752: reward = 110.00, steps = 110\n",
      "14:23:55 [DEBUG] verify episode 1753: reward = 189.00, steps = 189\n",
      "14:24:04 [DEBUG] verify episode 1754: reward = 156.00, steps = 156\n",
      "14:24:12 [DEBUG] verify episode 1755: reward = 135.00, steps = 135\n",
      "14:24:22 [DEBUG] verify episode 1756: reward = 188.00, steps = 188\n",
      "14:24:31 [DEBUG] verify episode 1757: reward = 162.00, steps = 162\n",
      "14:24:39 [DEBUG] verify episode 1758: reward = 157.00, steps = 157\n",
      "14:24:45 [DEBUG] verify episode 1759: reward = 112.00, steps = 112\n",
      "14:24:53 [DEBUG] verify episode 1760: reward = 158.00, steps = 158\n",
      "14:25:00 [DEBUG] verify episode 1761: reward = 131.00, steps = 131\n",
      "14:25:07 [DEBUG] verify episode 1762: reward = 136.00, steps = 136\n",
      "14:25:17 [DEBUG] verify episode 1763: reward = 182.00, steps = 182\n",
      "14:25:25 [DEBUG] verify episode 1764: reward = 158.00, steps = 158\n",
      "14:25:36 [DEBUG] verify episode 1765: reward = 200.00, steps = 200\n",
      "14:25:43 [DEBUG] verify episode 1766: reward = 134.00, steps = 134\n",
      "14:25:54 [DEBUG] verify episode 1767: reward = 200.00, steps = 200\n",
      "14:26:00 [DEBUG] verify episode 1768: reward = 120.00, steps = 120\n",
      "14:26:08 [DEBUG] verify episode 1769: reward = 155.00, steps = 155\n",
      "14:26:19 [DEBUG] verify episode 1770: reward = 200.00, steps = 200\n",
      "14:26:26 [DEBUG] verify episode 1771: reward = 144.00, steps = 144\n",
      "14:26:34 [DEBUG] verify episode 1772: reward = 127.00, steps = 127\n",
      "14:26:48 [DEBUG] verify episode 1773: reward = 200.00, steps = 200\n",
      "14:27:00 [DEBUG] verify episode 1774: reward = 200.00, steps = 200\n",
      "14:27:11 [DEBUG] verify episode 1775: reward = 200.00, steps = 200\n",
      "14:27:18 [DEBUG] verify episode 1776: reward = 116.00, steps = 116\n",
      "14:27:29 [DEBUG] verify episode 1777: reward = 200.00, steps = 200\n",
      "14:27:40 [DEBUG] verify episode 1778: reward = 187.00, steps = 187\n",
      "14:27:47 [DEBUG] verify episode 1779: reward = 118.00, steps = 118\n",
      "14:27:59 [DEBUG] verify episode 1780: reward = 200.00, steps = 200\n",
      "14:28:07 [DEBUG] verify episode 1781: reward = 130.00, steps = 130\n",
      "14:28:17 [DEBUG] verify episode 1782: reward = 161.00, steps = 161\n",
      "14:28:26 [DEBUG] verify episode 1783: reward = 156.00, steps = 156\n",
      "14:28:38 [DEBUG] verify episode 1784: reward = 200.00, steps = 200\n",
      "14:28:47 [DEBUG] verify episode 1785: reward = 134.00, steps = 134\n",
      "14:28:58 [DEBUG] verify episode 1786: reward = 200.00, steps = 200\n",
      "14:29:09 [DEBUG] verify episode 1787: reward = 167.00, steps = 167\n",
      "14:29:22 [DEBUG] verify episode 1788: reward = 200.00, steps = 200\n",
      "14:29:30 [DEBUG] verify episode 1789: reward = 149.00, steps = 149\n",
      "14:29:39 [DEBUG] verify episode 1790: reward = 141.00, steps = 141\n",
      "14:29:50 [DEBUG] verify episode 1791: reward = 164.00, steps = 164\n",
      "14:30:03 [DEBUG] verify episode 1792: reward = 200.00, steps = 200\n",
      "14:30:11 [DEBUG] verify episode 1793: reward = 140.00, steps = 140\n",
      "14:30:23 [DEBUG] verify episode 1794: reward = 200.00, steps = 200\n",
      "14:30:30 [DEBUG] verify episode 1795: reward = 125.00, steps = 125\n",
      "14:30:40 [DEBUG] verify episode 1796: reward = 194.00, steps = 194\n",
      "14:30:51 [DEBUG] verify episode 1797: reward = 200.00, steps = 200\n",
      "14:30:59 [DEBUG] verify episode 1798: reward = 149.00, steps = 149\n",
      "14:31:06 [DEBUG] verify episode 1799: reward = 134.00, steps = 134\n",
      "14:31:13 [DEBUG] verify episode 1800: reward = 131.00, steps = 131\n",
      "14:31:24 [DEBUG] verify episode 1801: reward = 198.00, steps = 198\n",
      "14:31:34 [DEBUG] verify episode 1802: reward = 148.00, steps = 148\n",
      "14:31:45 [DEBUG] verify episode 1803: reward = 199.00, steps = 199\n",
      "14:31:54 [DEBUG] verify episode 1804: reward = 142.00, steps = 142\n",
      "14:32:03 [DEBUG] verify episode 1805: reward = 170.00, steps = 170\n",
      "14:32:10 [DEBUG] verify episode 1806: reward = 127.00, steps = 127\n",
      "14:32:23 [DEBUG] verify episode 1807: reward = 200.00, steps = 200\n",
      "14:32:33 [DEBUG] verify episode 1808: reward = 160.00, steps = 160\n",
      "14:32:43 [DEBUG] verify episode 1809: reward = 170.00, steps = 170\n",
      "14:32:54 [DEBUG] verify episode 1810: reward = 200.00, steps = 200\n",
      "14:33:06 [DEBUG] verify episode 1811: reward = 200.00, steps = 200\n",
      "14:33:17 [DEBUG] verify episode 1812: reward = 182.00, steps = 182\n",
      "14:33:27 [DEBUG] verify episode 1813: reward = 176.00, steps = 176\n",
      "14:33:37 [DEBUG] verify episode 1814: reward = 179.00, steps = 179\n",
      "14:33:45 [DEBUG] verify episode 1815: reward = 144.00, steps = 144\n",
      "14:33:57 [DEBUG] verify episode 1816: reward = 200.00, steps = 200\n",
      "14:34:05 [DEBUG] verify episode 1817: reward = 142.00, steps = 142\n",
      "14:34:14 [DEBUG] verify episode 1818: reward = 161.00, steps = 161\n",
      "14:34:24 [DEBUG] verify episode 1819: reward = 176.00, steps = 176\n",
      "14:34:35 [DEBUG] verify episode 1820: reward = 192.00, steps = 192\n",
      "14:34:46 [DEBUG] verify episode 1821: reward = 193.00, steps = 193\n",
      "14:34:55 [DEBUG] verify episode 1822: reward = 159.00, steps = 159\n",
      "14:35:07 [DEBUG] verify episode 1823: reward = 200.00, steps = 200\n",
      "14:35:16 [DEBUG] verify episode 1824: reward = 158.00, steps = 158\n",
      "14:35:23 [DEBUG] verify episode 1825: reward = 128.00, steps = 128\n",
      "14:35:31 [DEBUG] verify episode 1826: reward = 135.00, steps = 135\n",
      "14:35:38 [DEBUG] verify episode 1827: reward = 141.00, steps = 141\n",
      "14:35:50 [DEBUG] verify episode 1828: reward = 200.00, steps = 200\n",
      "14:36:01 [DEBUG] verify episode 1829: reward = 200.00, steps = 200\n",
      "14:36:12 [DEBUG] verify episode 1830: reward = 193.00, steps = 193\n",
      "14:36:24 [DEBUG] verify episode 1831: reward = 200.00, steps = 200\n",
      "14:36:32 [DEBUG] verify episode 1832: reward = 150.00, steps = 150\n",
      "14:36:40 [DEBUG] verify episode 1833: reward = 148.00, steps = 148\n",
      "14:36:49 [DEBUG] verify episode 1834: reward = 156.00, steps = 156\n",
      "14:36:57 [DEBUG] verify episode 1835: reward = 137.00, steps = 137\n",
      "14:37:08 [DEBUG] verify episode 1836: reward = 186.00, steps = 186\n",
      "14:37:17 [DEBUG] verify episode 1837: reward = 165.00, steps = 165\n",
      "14:37:25 [DEBUG] verify episode 1838: reward = 141.00, steps = 141\n",
      "14:37:32 [DEBUG] verify episode 1839: reward = 126.00, steps = 126\n",
      "14:37:41 [DEBUG] verify episode 1840: reward = 165.00, steps = 165\n",
      "14:37:52 [DEBUG] verify episode 1841: reward = 200.00, steps = 200\n",
      "14:38:04 [DEBUG] verify episode 1842: reward = 200.00, steps = 200\n",
      "14:38:13 [DEBUG] verify episode 1843: reward = 180.00, steps = 180\n",
      "14:38:25 [DEBUG] verify episode 1844: reward = 198.00, steps = 198\n",
      "14:38:36 [DEBUG] verify episode 1845: reward = 188.00, steps = 188\n",
      "14:38:43 [DEBUG] verify episode 1846: reward = 125.00, steps = 125\n",
      "14:38:50 [DEBUG] verify episode 1847: reward = 119.00, steps = 119\n",
      "14:38:58 [DEBUG] verify episode 1848: reward = 151.00, steps = 151\n",
      "14:39:06 [DEBUG] verify episode 1849: reward = 131.00, steps = 131\n",
      "14:39:15 [DEBUG] verify episode 1850: reward = 174.00, steps = 174\n",
      "14:39:25 [DEBUG] verify episode 1851: reward = 165.00, steps = 165\n",
      "14:39:33 [DEBUG] verify episode 1852: reward = 158.00, steps = 158\n",
      "14:39:45 [DEBUG] verify episode 1853: reward = 200.00, steps = 200\n",
      "14:39:53 [DEBUG] verify episode 1854: reward = 151.00, steps = 151\n",
      "14:40:01 [DEBUG] verify episode 1855: reward = 144.00, steps = 144\n",
      "14:40:12 [DEBUG] verify episode 1856: reward = 200.00, steps = 200\n",
      "14:40:21 [DEBUG] verify episode 1857: reward = 161.00, steps = 161\n",
      "14:40:29 [DEBUG] verify episode 1858: reward = 141.00, steps = 141\n",
      "14:40:40 [DEBUG] verify episode 1859: reward = 200.00, steps = 200\n",
      "14:40:49 [DEBUG] verify episode 1860: reward = 150.00, steps = 150\n",
      "14:40:59 [DEBUG] verify episode 1861: reward = 169.00, steps = 169\n",
      "14:41:08 [DEBUG] verify episode 1862: reward = 144.00, steps = 144\n",
      "14:41:19 [DEBUG] verify episode 1863: reward = 200.00, steps = 200\n",
      "14:41:28 [DEBUG] verify episode 1864: reward = 168.00, steps = 168\n",
      "14:41:39 [DEBUG] verify episode 1865: reward = 200.00, steps = 200\n",
      "14:41:49 [DEBUG] verify episode 1866: reward = 200.00, steps = 200\n",
      "14:41:57 [DEBUG] verify episode 1867: reward = 145.00, steps = 145\n",
      "14:42:07 [DEBUG] verify episode 1868: reward = 191.00, steps = 191\n",
      "14:42:17 [DEBUG] verify episode 1869: reward = 177.00, steps = 177\n",
      "14:42:25 [DEBUG] verify episode 1870: reward = 149.00, steps = 149\n",
      "14:42:36 [DEBUG] verify episode 1871: reward = 200.00, steps = 200\n",
      "14:42:45 [DEBUG] verify episode 1872: reward = 164.00, steps = 164\n",
      "14:42:55 [DEBUG] verify episode 1873: reward = 160.00, steps = 160\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14:43:04 [DEBUG] verify episode 1874: reward = 165.00, steps = 165\n",
      "14:43:15 [DEBUG] verify episode 1875: reward = 200.00, steps = 200\n",
      "14:43:26 [DEBUG] verify episode 1876: reward = 200.00, steps = 200\n",
      "14:43:38 [DEBUG] verify episode 1877: reward = 200.00, steps = 200\n",
      "14:43:49 [DEBUG] verify episode 1878: reward = 196.00, steps = 196\n",
      "14:44:01 [DEBUG] verify episode 1879: reward = 195.00, steps = 195\n",
      "14:44:13 [DEBUG] verify episode 1880: reward = 200.00, steps = 200\n",
      "14:44:23 [DEBUG] verify episode 1881: reward = 177.00, steps = 177\n",
      "14:44:31 [DEBUG] verify episode 1882: reward = 144.00, steps = 144\n",
      "14:44:39 [DEBUG] verify episode 1883: reward = 146.00, steps = 146\n",
      "14:44:50 [DEBUG] verify episode 1884: reward = 179.00, steps = 179\n",
      "14:45:02 [DEBUG] verify episode 1885: reward = 176.00, steps = 176\n",
      "14:45:13 [DEBUG] verify episode 1886: reward = 198.00, steps = 198\n",
      "14:45:24 [DEBUG] verify episode 1887: reward = 200.00, steps = 200\n",
      "14:45:34 [DEBUG] verify episode 1888: reward = 171.00, steps = 171\n",
      "14:45:43 [DEBUG] verify episode 1889: reward = 164.00, steps = 164\n",
      "14:45:54 [DEBUG] verify episode 1890: reward = 179.00, steps = 179\n",
      "14:46:03 [DEBUG] verify episode 1891: reward = 156.00, steps = 156\n",
      "14:46:12 [DEBUG] verify episode 1892: reward = 150.00, steps = 150\n",
      "14:46:24 [DEBUG] verify episode 1893: reward = 197.00, steps = 197\n",
      "14:46:31 [DEBUG] verify episode 1894: reward = 139.00, steps = 139\n",
      "14:46:43 [DEBUG] verify episode 1895: reward = 200.00, steps = 200\n",
      "14:46:54 [DEBUG] verify episode 1896: reward = 196.00, steps = 196\n",
      "14:47:04 [DEBUG] verify episode 1897: reward = 200.00, steps = 200\n",
      "14:47:15 [DEBUG] verify episode 1898: reward = 200.00, steps = 200\n",
      "14:47:22 [DEBUG] verify episode 1899: reward = 126.00, steps = 126\n",
      "14:47:29 [DEBUG] verify episode 1900: reward = 134.00, steps = 134\n",
      "14:47:40 [DEBUG] verify episode 1901: reward = 192.00, steps = 192\n",
      "14:47:50 [DEBUG] verify episode 1902: reward = 168.00, steps = 168\n",
      "14:47:59 [DEBUG] verify episode 1903: reward = 160.00, steps = 160\n",
      "14:48:10 [DEBUG] verify episode 1904: reward = 193.00, steps = 193\n",
      "14:48:18 [DEBUG] verify episode 1905: reward = 160.00, steps = 160\n",
      "14:48:28 [DEBUG] verify episode 1906: reward = 186.00, steps = 186\n",
      "14:48:35 [DEBUG] verify episode 1907: reward = 126.00, steps = 126\n",
      "14:48:44 [DEBUG] verify episode 1908: reward = 168.00, steps = 168\n",
      "14:48:56 [DEBUG] verify episode 1909: reward = 200.00, steps = 200\n",
      "14:49:07 [DEBUG] verify episode 1910: reward = 177.00, steps = 177\n",
      "14:49:16 [DEBUG] verify episode 1911: reward = 160.00, steps = 160\n",
      "14:49:27 [DEBUG] verify episode 1912: reward = 200.00, steps = 200\n",
      "14:49:34 [DEBUG] verify episode 1913: reward = 129.00, steps = 129\n",
      "14:49:43 [DEBUG] verify episode 1914: reward = 161.00, steps = 161\n",
      "14:49:54 [DEBUG] verify episode 1915: reward = 200.00, steps = 200\n",
      "14:50:03 [DEBUG] verify episode 1916: reward = 144.00, steps = 144\n",
      "14:50:13 [DEBUG] verify episode 1917: reward = 171.00, steps = 171\n",
      "14:50:24 [DEBUG] verify episode 1918: reward = 169.00, steps = 169\n",
      "14:50:33 [DEBUG] verify episode 1919: reward = 154.00, steps = 154\n",
      "14:50:42 [DEBUG] verify episode 1920: reward = 155.00, steps = 155\n",
      "14:50:54 [DEBUG] verify episode 1921: reward = 200.00, steps = 200\n",
      "14:51:03 [DEBUG] verify episode 1922: reward = 150.00, steps = 150\n",
      "14:51:14 [DEBUG] verify episode 1923: reward = 189.00, steps = 189\n",
      "14:51:28 [DEBUG] verify episode 1924: reward = 200.00, steps = 200\n",
      "14:51:40 [DEBUG] verify episode 1925: reward = 179.00, steps = 179\n",
      "14:51:52 [DEBUG] verify episode 1926: reward = 159.00, steps = 159\n",
      "14:52:05 [DEBUG] verify episode 1927: reward = 170.00, steps = 170\n",
      "14:52:20 [DEBUG] verify episode 1928: reward = 200.00, steps = 200\n",
      "14:52:34 [DEBUG] verify episode 1929: reward = 185.00, steps = 185\n",
      "14:52:46 [DEBUG] verify episode 1930: reward = 164.00, steps = 164\n",
      "14:52:57 [DEBUG] verify episode 1931: reward = 200.00, steps = 200\n",
      "14:53:08 [DEBUG] verify episode 1932: reward = 186.00, steps = 186\n",
      "14:53:17 [DEBUG] verify episode 1933: reward = 166.00, steps = 166\n",
      "14:53:28 [DEBUG] verify episode 1934: reward = 192.00, steps = 192\n",
      "14:53:39 [DEBUG] verify episode 1935: reward = 200.00, steps = 200\n",
      "14:53:47 [DEBUG] verify episode 1936: reward = 142.00, steps = 142\n",
      "14:53:57 [DEBUG] verify episode 1937: reward = 177.00, steps = 177\n",
      "14:54:08 [DEBUG] verify episode 1938: reward = 191.00, steps = 191\n",
      "14:54:19 [DEBUG] verify episode 1939: reward = 200.00, steps = 200\n",
      "14:54:30 [DEBUG] verify episode 1940: reward = 200.00, steps = 200\n",
      "14:54:41 [DEBUG] verify episode 1941: reward = 200.00, steps = 200\n",
      "14:54:50 [DEBUG] verify episode 1942: reward = 174.00, steps = 174\n",
      "14:55:00 [DEBUG] verify episode 1943: reward = 175.00, steps = 175\n",
      "14:55:11 [DEBUG] verify episode 1944: reward = 200.00, steps = 200\n",
      "14:55:19 [DEBUG] verify episode 1945: reward = 160.00, steps = 160\n",
      "14:55:29 [DEBUG] verify episode 1946: reward = 167.00, steps = 167\n",
      "14:55:36 [DEBUG] verify episode 1947: reward = 134.00, steps = 134\n",
      "14:55:46 [DEBUG] verify episode 1948: reward = 174.00, steps = 174\n",
      "14:55:56 [DEBUG] verify episode 1949: reward = 200.00, steps = 200\n",
      "14:56:07 [DEBUG] verify episode 1950: reward = 200.00, steps = 200\n",
      "14:56:16 [DEBUG] verify episode 1951: reward = 159.00, steps = 159\n",
      "14:56:24 [DEBUG] verify episode 1952: reward = 153.00, steps = 153\n",
      "14:56:33 [DEBUG] verify episode 1953: reward = 178.00, steps = 178\n",
      "14:56:43 [DEBUG] verify episode 1954: reward = 162.00, steps = 162\n",
      "14:56:53 [DEBUG] verify episode 1955: reward = 171.00, steps = 171\n",
      "14:57:04 [DEBUG] verify episode 1956: reward = 200.00, steps = 200\n",
      "14:57:16 [DEBUG] verify episode 1957: reward = 200.00, steps = 200\n",
      "14:57:25 [DEBUG] verify episode 1958: reward = 160.00, steps = 160\n",
      "14:57:36 [DEBUG] verify episode 1959: reward = 200.00, steps = 200\n",
      "14:57:44 [DEBUG] verify episode 1960: reward = 145.00, steps = 145\n",
      "14:57:53 [DEBUG] verify episode 1961: reward = 162.00, steps = 162\n",
      "14:58:03 [DEBUG] verify episode 1962: reward = 200.00, steps = 200\n",
      "14:58:12 [DEBUG] verify episode 1963: reward = 155.00, steps = 155\n",
      "14:58:20 [DEBUG] verify episode 1964: reward = 159.00, steps = 159\n",
      "14:58:30 [DEBUG] verify episode 1965: reward = 174.00, steps = 174\n",
      "14:58:42 [DEBUG] verify episode 1966: reward = 200.00, steps = 200\n",
      "14:58:53 [DEBUG] verify episode 1967: reward = 199.00, steps = 199\n",
      "14:59:03 [DEBUG] verify episode 1968: reward = 200.00, steps = 200\n",
      "14:59:14 [DEBUG] verify episode 1969: reward = 200.00, steps = 200\n",
      "14:59:22 [DEBUG] verify episode 1970: reward = 143.00, steps = 143\n",
      "14:59:32 [DEBUG] verify episode 1971: reward = 200.00, steps = 200\n",
      "14:59:43 [DEBUG] verify episode 1972: reward = 200.00, steps = 200\n",
      "14:59:54 [DEBUG] verify episode 1973: reward = 200.00, steps = 200\n",
      "15:00:05 [DEBUG] verify episode 1974: reward = 200.00, steps = 200\n",
      "15:00:16 [DEBUG] verify episode 1975: reward = 200.00, steps = 200\n",
      "15:00:26 [DEBUG] verify episode 1976: reward = 156.00, steps = 156\n",
      "15:00:39 [DEBUG] verify episode 1977: reward = 200.00, steps = 200\n",
      "15:00:47 [DEBUG] verify episode 1978: reward = 141.00, steps = 141\n",
      "15:00:57 [DEBUG] verify episode 1979: reward = 192.00, steps = 192\n",
      "15:01:09 [DEBUG] verify episode 1980: reward = 199.00, steps = 199\n",
      "15:01:17 [DEBUG] verify episode 1981: reward = 152.00, steps = 152\n",
      "15:01:26 [DEBUG] verify episode 1982: reward = 166.00, steps = 166\n",
      "15:01:37 [DEBUG] verify episode 1983: reward = 200.00, steps = 200\n",
      "15:01:48 [DEBUG] verify episode 1984: reward = 200.00, steps = 200\n",
      "15:01:58 [DEBUG] verify episode 1985: reward = 200.00, steps = 200\n",
      "15:02:09 [DEBUG] verify episode 1986: reward = 198.00, steps = 198\n",
      "15:02:21 [DEBUG] verify episode 1987: reward = 200.00, steps = 200\n",
      "15:02:31 [DEBUG] verify episode 1988: reward = 169.00, steps = 169\n",
      "15:02:45 [DEBUG] verify episode 1989: reward = 200.00, steps = 200\n",
      "15:02:59 [DEBUG] verify episode 1990: reward = 200.00, steps = 200\n",
      "15:03:11 [DEBUG] verify episode 1991: reward = 200.00, steps = 200\n",
      "15:03:22 [DEBUG] verify episode 1992: reward = 194.00, steps = 194\n",
      "15:03:22 [INFO] ==== test ====\n",
      "15:03:34 [DEBUG] test episode 0: reward = 200.00, steps = 200\n",
      "15:03:45 [DEBUG] test episode 1: reward = 200.00, steps = 200\n",
      "15:03:58 [DEBUG] test episode 2: reward = 196.00, steps = 196\n",
      "15:04:08 [DEBUG] test episode 3: reward = 155.00, steps = 155\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15:04:21 [DEBUG] test episode 4: reward = 172.00, steps = 172\n",
      "15:04:34 [DEBUG] test episode 5: reward = 189.00, steps = 189\n",
      "15:04:46 [DEBUG] test episode 6: reward = 200.00, steps = 200\n",
      "15:04:58 [DEBUG] test episode 7: reward = 200.00, steps = 200\n",
      "15:05:10 [DEBUG] test episode 8: reward = 200.00, steps = 200\n",
      "15:05:22 [DEBUG] test episode 9: reward = 200.00, steps = 200\n",
      "15:05:33 [DEBUG] test episode 10: reward = 200.00, steps = 200\n",
      "15:05:45 [DEBUG] test episode 11: reward = 200.00, steps = 200\n",
      "15:05:56 [DEBUG] test episode 12: reward = 200.00, steps = 200\n",
      "15:06:07 [DEBUG] test episode 13: reward = 200.00, steps = 200\n",
      "15:06:18 [DEBUG] test episode 14: reward = 186.00, steps = 186\n",
      "15:06:29 [DEBUG] test episode 15: reward = 200.00, steps = 200\n",
      "15:06:40 [DEBUG] test episode 16: reward = 200.00, steps = 200\n",
      "15:06:56 [DEBUG] test episode 17: reward = 200.00, steps = 200\n",
      "15:07:09 [DEBUG] test episode 18: reward = 186.00, steps = 186\n",
      "15:07:22 [DEBUG] test episode 19: reward = 174.00, steps = 174\n",
      "15:07:34 [DEBUG] test episode 20: reward = 175.00, steps = 175\n",
      "15:07:48 [DEBUG] test episode 21: reward = 195.00, steps = 195\n",
      "15:08:01 [DEBUG] test episode 22: reward = 200.00, steps = 200\n",
      "15:08:12 [DEBUG] test episode 23: reward = 200.00, steps = 200\n",
      "15:08:24 [DEBUG] test episode 24: reward = 176.00, steps = 176\n",
      "15:08:34 [DEBUG] test episode 25: reward = 200.00, steps = 200\n",
      "15:08:46 [DEBUG] test episode 26: reward = 200.00, steps = 200\n",
      "15:08:57 [DEBUG] test episode 27: reward = 200.00, steps = 200\n",
      "15:09:08 [DEBUG] test episode 28: reward = 200.00, steps = 200\n",
      "15:09:19 [DEBUG] test episode 29: reward = 187.00, steps = 187\n",
      "15:09:31 [DEBUG] test episode 30: reward = 198.00, steps = 198\n",
      "15:09:41 [DEBUG] test episode 31: reward = 200.00, steps = 200\n",
      "15:09:51 [DEBUG] test episode 32: reward = 200.00, steps = 200\n",
      "15:10:02 [DEBUG] test episode 33: reward = 200.00, steps = 200\n",
      "15:10:12 [DEBUG] test episode 34: reward = 196.00, steps = 196\n",
      "15:10:21 [DEBUG] test episode 35: reward = 176.00, steps = 176\n",
      "15:10:32 [DEBUG] test episode 36: reward = 200.00, steps = 200\n",
      "15:10:42 [DEBUG] test episode 37: reward = 200.00, steps = 200\n",
      "15:10:53 [DEBUG] test episode 38: reward = 200.00, steps = 200\n",
      "15:11:03 [DEBUG] test episode 39: reward = 172.00, steps = 172\n",
      "15:11:13 [DEBUG] test episode 40: reward = 200.00, steps = 200\n",
      "15:11:24 [DEBUG] test episode 41: reward = 200.00, steps = 200\n",
      "15:11:34 [DEBUG] test episode 42: reward = 200.00, steps = 200\n",
      "15:11:45 [DEBUG] test episode 43: reward = 200.00, steps = 200\n",
      "15:11:53 [DEBUG] test episode 44: reward = 172.00, steps = 172\n",
      "15:12:04 [DEBUG] test episode 45: reward = 200.00, steps = 200\n",
      "15:12:15 [DEBUG] test episode 46: reward = 200.00, steps = 200\n",
      "15:12:25 [DEBUG] test episode 47: reward = 200.00, steps = 200\n",
      "15:12:34 [DEBUG] test episode 48: reward = 177.00, steps = 177\n",
      "15:12:46 [DEBUG] test episode 49: reward = 200.00, steps = 200\n",
      "15:12:56 [DEBUG] test episode 50: reward = 200.00, steps = 200\n",
      "15:13:07 [DEBUG] test episode 51: reward = 200.00, steps = 200\n",
      "15:13:17 [DEBUG] test episode 52: reward = 200.00, steps = 200\n",
      "15:13:28 [DEBUG] test episode 53: reward = 200.00, steps = 200\n",
      "15:13:38 [DEBUG] test episode 54: reward = 200.00, steps = 200\n",
      "15:13:48 [DEBUG] test episode 55: reward = 200.00, steps = 200\n",
      "15:13:59 [DEBUG] test episode 56: reward = 200.00, steps = 200\n",
      "15:14:09 [DEBUG] test episode 57: reward = 200.00, steps = 200\n",
      "15:14:20 [DEBUG] test episode 58: reward = 200.00, steps = 200\n",
      "15:14:30 [DEBUG] test episode 59: reward = 200.00, steps = 200\n",
      "15:14:41 [DEBUG] test episode 60: reward = 200.00, steps = 200\n",
      "15:14:51 [DEBUG] test episode 61: reward = 200.00, steps = 200\n",
      "15:15:02 [DEBUG] test episode 62: reward = 200.00, steps = 200\n",
      "15:15:12 [DEBUG] test episode 63: reward = 200.00, steps = 200\n",
      "15:15:23 [DEBUG] test episode 64: reward = 200.00, steps = 200\n",
      "15:15:33 [DEBUG] test episode 65: reward = 200.00, steps = 200\n",
      "15:15:44 [DEBUG] test episode 66: reward = 200.00, steps = 200\n",
      "15:15:54 [DEBUG] test episode 67: reward = 200.00, steps = 200\n",
      "15:16:04 [DEBUG] test episode 68: reward = 200.00, steps = 200\n",
      "15:16:15 [DEBUG] test episode 69: reward = 200.00, steps = 200\n",
      "15:16:24 [DEBUG] test episode 70: reward = 181.00, steps = 181\n",
      "15:16:35 [DEBUG] test episode 71: reward = 200.00, steps = 200\n",
      "15:16:45 [DEBUG] test episode 72: reward = 200.00, steps = 200\n",
      "15:16:56 [DEBUG] test episode 73: reward = 200.00, steps = 200\n",
      "15:17:06 [DEBUG] test episode 74: reward = 200.00, steps = 200\n",
      "15:17:15 [DEBUG] test episode 75: reward = 173.00, steps = 173\n",
      "15:17:26 [DEBUG] test episode 76: reward = 200.00, steps = 200\n",
      "15:17:36 [DEBUG] test episode 77: reward = 200.00, steps = 200\n",
      "15:17:46 [DEBUG] test episode 78: reward = 200.00, steps = 200\n",
      "15:17:57 [DEBUG] test episode 79: reward = 200.00, steps = 200\n",
      "15:18:07 [DEBUG] test episode 80: reward = 200.00, steps = 200\n",
      "15:18:17 [DEBUG] test episode 81: reward = 186.00, steps = 186\n",
      "15:18:27 [DEBUG] test episode 82: reward = 200.00, steps = 200\n",
      "15:18:37 [DEBUG] test episode 83: reward = 187.00, steps = 187\n",
      "15:18:48 [DEBUG] test episode 84: reward = 200.00, steps = 200\n",
      "15:18:58 [DEBUG] test episode 85: reward = 200.00, steps = 200\n",
      "15:19:09 [DEBUG] test episode 86: reward = 200.00, steps = 200\n",
      "15:19:19 [DEBUG] test episode 87: reward = 200.00, steps = 200\n",
      "15:19:30 [DEBUG] test episode 88: reward = 200.00, steps = 200\n",
      "15:19:40 [DEBUG] test episode 89: reward = 200.00, steps = 200\n",
      "15:19:51 [DEBUG] test episode 90: reward = 200.00, steps = 200\n",
      "15:20:01 [DEBUG] test episode 91: reward = 200.00, steps = 200\n",
      "15:20:11 [DEBUG] test episode 92: reward = 197.00, steps = 197\n",
      "15:20:22 [DEBUG] test episode 93: reward = 200.00, steps = 200\n",
      "15:20:32 [DEBUG] test episode 94: reward = 200.00, steps = 200\n",
      "15:20:43 [DEBUG] test episode 95: reward = 200.00, steps = 200\n",
      "15:20:53 [DEBUG] test episode 96: reward = 200.00, steps = 200\n",
      "15:21:03 [DEBUG] test episode 97: reward = 189.00, steps = 189\n",
      "15:21:14 [DEBUG] test episode 98: reward = 200.00, steps = 200\n",
      "15:21:24 [DEBUG] test episode 99: reward = 200.00, steps = 200\n",
      "15:21:24 [INFO] average episode reward = 195.95 ± 8.97\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd5gUVfb3v2dyHsIEGBgYwoDkNBKUKKgEFXWVxV3DqivrrglddXF11XV/rKxZX9PqyppzWANmBTGgOCg5CMiAAwMMOQww6b5/dFVPdXdVdVVXVXd19fk8zzzTfavq1ukK3zp17r3nkhACDMMwjLdIirUBDMMwjP2wuDMMw3gQFneGYRgPwuLOMAzjQVjcGYZhPEhKrA0AgIKCAlFWVhZrMxiGYeKKJUuW7BJCFKotc4W4l5WVobKyMtZmMAzDxBVEtFlrGYdlGIZhPAiLO8MwjAdhcWcYhvEgLO4MwzAehMWdYRjGg4QVdyIqJaL5RLSGiFYR0TVSeRsi+oSI1kv/Wyu2uYmINhDROiI61ckfwDAMw4RixHNvBPBnIUQvAMMBXEFEvQHMAvCZEKIcwGfSd0jLpgPoA2AigEeJKNkJ4xmGYRh1wvZzF0LUAKiRPh8kojUAOgCYCmCstNozABYA+ItU/rIQ4hiATUS0AcBQAIvsNj7REULg9SXVOGNgCdJT1J+f7y7bhtE9CpGfmapZz8+1h7D9wFGc0K3AX/bNxl1ol5eBroU5aG4WeP2Hapw1qANSk8P7A1+t34XSNpno3DY7oPzDlduxuuYAOrbKxDlDOiIpifzLjjU24Za3VmJC72LkZqTg09U7MaFXEar3HcE5gzti2/4jmL92J84f3hnLq/cjOYlQkJOOP7+2FPdNG4jivAzsOHAUy6v3Y/xxRXj5+1+wff8RDChthcZmgfU7DuLyMd2QItm/ono/Xvhus395z+JctMpKRXZ6CtbWHMD4XsVhf+eqbfvx3KLN+L8z+6Jqdx12HTqGfXX1+GzNTpwxsASjykPHljQ1C1z98o8YU16IIWWt0a0wx7/s+6o9yM9MRY/iXP+6byypxq+GdER9YzPeX1GDswd3ABEF1Pnusm0YXV6I/Cz1c7xu+0EcONqAuvomPL5gIy4c0Rnt8jOQlpKEwpx0vLu8BuVFOejcNgu/++/3uGVKL9Q3NmNSv/b+OhZt3I3VNQdwap9idGydBQCo2X8Ejy3YiJz0FIw7rgi3vr0K14wvR9Xuw6irb0KXgiycNagjVlTvx74j9Vi8aQ/aZKdhX10DXl9SjTMHlWDGqG74Yn0tBpW2wrLqfcjNSMW3P+/GwaMN6Fmci6ZmgXb5Gag9VI+te49g1qTj8OHK7UhPScIr3/+C0weUYMueOqQkEZqEwJwP1mJK//b409hueHdZDQpz03HxCWUB19rWfUdw2kNfIj8zFXX1Tdh58BjyMlJwz7kD8MOWfdh/pB6DO7XG4k17kJeZiqFd2qAgJx1fb9iFd5dtQ05GCkpaZaK+sRkn9/ZdJ/PX7sQNp/bEja8vx/CubbG4ag8AoCQ/AzsPHsOgTq3w9YbdqD14DDsPHsXA0lZom52Oldv245U/jECHVplhrzezkJl87kRUBmAhgL4AtgghWimW7RVCtCaihwF8K4R4Xip/CsAHQojXg+qaAWAGAHTq1GnI5s2affEZDT5cuR2XP78El4/phlmTjgtZvmnXYYy7ZwFOOq4Ic393vGY9ZbPmAQCq5kxRLfvfj1sx85WluHZCD1wzoTysXWr1KcsB4M6z++G8oZ383+/+aC0emb9Rtb47z+6H91fU4Mv1u/DljeMw6q75AICJfdrhw1Xb/fsafdd8bNlThwenD8Q1Ly8Nqefmyb1w2eiuIbYoSUkiNDaLENv1fufNk3th9vtrQpar1fHsoirc+vYq1XWCj9tz327G3/63Eree1hubdh3Gc99uxvOXDsPI8paHcM3+Ixhx5+cY0bUtXpoxXNdONboUZGPTrsOqy37428lok50WUEd2WjJW3TERADB09qfYefCYZt0AsPGfk9Htr+9rLi9tk4lf9hzRrUPJV38Zh5H/mm94fQD49Lox6F7U8hDVOx6x4OzBHXDftIERbUtES4QQFWrLDDeoElEOgDcAzBRCHNBbVaUs5AkihHhCCFEhhKgoLFQdPcuE4cDRBgDArkPqN9iR+iYAwLZ9xm8eNfbV1QMA9hzWv5HN1dkQ8L1WRyT2HK7Hyq37Afi8WRmlKNXVN2LLnjrVuv31SL9Dj8Zm85PX7D4cvl6/DWbWPeRbd29dPbYfOAoAOHSsMWCdJMmL31B7yHC9Sqp2qws7ADQ0NYeUHZauKQBhhd0IZoQdAG5/Z7XpfTS7fEIivWvfCobEnYhS4RP2F4QQb0rFO4iovbS8PYCdUnk1gFLF5h0BbLPHXCaAKF+zsbpFmpsFjjY0h9igjE7c9OYKQ3Wt2rbflMC6AaW3FBSRQbIUbqgLEv1I6g7GDk3Ue3hEwpY95utzubarPkTtwEhvGQLwFIA1Qoj7FIveAXCR9PkiAG8ryqcTUToRdQFQDmCxfSYzXoR0ZEYAONrYpLkcADYqPFetUCMBmPLQVzj9/30ViYmupq5B//ho4bTujb/3C1vri+TNyu00Njnzm4x47icCuADASUS0VPqbDGAOgJOJaD2Ak6XvEEKsAvAqgNUAPgRwhRAisiuP0UfP7Yr/3flpFsIW70v2ercaCFOt234QzQ4ISaS/Q2s7t3uldtMUwTkR0iPspx0H0eiQl2yFBoceWEZ6y3wF7ft6vMY2swHMtmAX40LsvASDwwvB35Uor/3qvXU2WqHNqQ8sxF8mHoc/ju0Wdl1h4MjsP9Jgi7BE8wFr5HdFm0i93A07D+GU+xcaOp/RpqHRmQeOK1L+MonH/iMNqKtvREOT0O2mCQSGWS54arGiXGN9WyyEvxHXDob84xM0NgtcMz58byN11H+VGwXYSSKJTx840oiDUueDHzbvtdsky0TyNmIEFvd4Jsr3tZ1e42MLNuK1ymrsOnQMVXOmhPHcYyRsNv7gSGLFar8vuI97ohGJEE77d8sQGzc+Cq89uYcj9XJuGQ9jtw5YuTF2Sl35lGh14QzZr8kdey4OrXciHfytbjyOTVaNcuFv6l6UHX6lCGBx9zBuujm3q4i7Uex6a9XrkaO+vvuRD0082GoHVnuWuDOM5czZY3H3APFwY1t50Gh1bTQbczf7JhPrEIjy9zn1oHaTA2AEMyPq1be3yRAbSXLoMmNx9wB2iZkWsRY57Zi7SzBhyIOfrbe0i+Az4aRYPbpgA8pmzUO9Q705YkGlCxtUnbq/WNw1+NVj3+CsR7+OtRmuQM9buublH3H87E/D1xF2De0LXCssY9WLC4cb34ii+Zx9/tstAFrSWCg59/FvomeIAtc80G3EqVPKvWU0WOLCJ7wWsRSht5c6n1nCbG4QvRGqZoh1xxQj8fSoxJBVDPi+Kjb3hxvDKlZx6jpjz50Jix2vjVa8bK1NN9bam7ckGDd67onOkQjTLLgZsw39RmFxZ8LidPgDCDdCNTbumtGHWjSsM9uozMQP7LkzrmBj7SGMv3cB9prMrGhFg4wI2MqtelmoI8MtnjuRIkSjYVQ0G70nPrAwavtKBFjcmYix6t0pheOxBRuxsfYwPlmzw1Yb9K5vs5579V71xGAfrzZnsxsJfoWXj0xTs/Dn97d9n0EnZ+32g47sJ1Hh3jJMwmJ2ENPT31SplpsWJYP3nJWw1ZqaA7ju1aUhw+rv/GANFqzbqbGVOh+viv+HVyLCvWWYEIz2lIh1rw8fVhpUYxRzj0Jg5o/PL0HV7jpcdVJgQrF/f/GzITuicWxmvbHc8X0kMhyWYSLG7Y1u2/cfxQvfbdFc7vZp0p78cpPj+4jlIXh/xfbY7TwBSOKwDBNrhIhcZPQaPP/8Wuhk1kpiNfkOEfD6kmp8L81kb5adB43n0wnngfuXBumAy597jAGcej9kcWeiwm3vrNJc1hAmGVSsBIwAXP/aMpz7+KKw66px/WvhwxlmG9NcEWFj7CVWYRkimktEO4lopaLsFcWUe1VEtFQqLyOiI4pljztjNgOEjwnb/bbnWOw+jHjHLOZu8fceMzHgRu8XuqPNhHEKp9p2jDSoPg3gYQDPygVCiF/Ln4noXgDKKWs2CiEG2mUgo024BlU7NFEIgVe+/yWkvmjqjdtj7gzjRozMobqQiMrUlpHvnXIagJPsNYtxCz9s2YvVNfYPEAogzJMiZjF3i48wIx73pl3GUijwCFXGLFZj7qMA7BBCKPOYdiGiH4noCyIapbUhEc0gokoiqqytrbVohjv5fO0OHDrWGLP92/E6f+hYFHJ5hBGo2KUfiMludYl1+mUmfrAq7ucBeEnxvQZAJyHEIADXAXiRiPLUNhRCPCGEqBBCVBQWFlo0w31s2nUYlzxdiRtfX+b4vsLd71YyBwYLayxmsomVcxpNHV34U2QOjjtnFmLcQMTiTkQpAM4G8IpcJoQ4JoTYLX1eAmAjAGdmf3U5hyWPfdOuOsf3JYQv5/bSX/Y5ULfz4hFuCr5YNahabVkwY/bf311tyAr5c31jMyrDdNHcV1eP+SZHuTLewYrnPgHAWiFEtVxARIVElCx97gqgHMDPGtt7mmi/PV/36lKc+cjX2K0y6bSV2HFz0CQ8TrTsb9mj/wAMtiFamE2OpsUaB9os5nywFuc8vki37tvfWYWL//u97ftm4gMjXSFfArAIQE8iqiaiS6VF0xEYkgGA0QCWE9EyAK8DuFwIEdkIEI8QrXS5y6t9HZbU8l1beXUPnm0+FmGAWMXcP1zVMjJz/Y7IkmXtOnQMkx780rItwYdg7fYDUv3aD6AlW+JnwhnGfoz0ljlPo/x3KmVvAHjDulnxTzTyklhFCIFf9qhnUJRpjlVXFaUNsTcBJ9+/EO9dNRJ9O+Sb2u6wzQ3q8huh/F/v0HBPmsSGR6gmMK8tqcbou+frrtPoAmWNXcw9kG379B+EwRDZ95DXfGNyybFhIiczLdmRelnc45jwOdL1heXHLeEbYINT0frr1mhUcEKI43UQkxD2tL0o65DPqfxfK3XDoWONrnjrYvR59Q8jkJPuTHJeTvkbJxxrbEJ6ijNPeD2Meu7NzQKNzQKpye4PR0VKQ5NAc7NAUlJ0f2N9Y0uLcn2Tr01FFvw73gvtZXO0oQl9b/soKrYx1miXl+FY3ey5O4SdvWU+XrUdPW/5ECu37g8oj0aPnCaDXVVueXsletzygaejBFe8+ANuenOF4fXtOhQPfb4BG3YeAgBc8nQllmzWbyg9Uu+9SaS9ipP3MIu7w9ghdnJf5WXVgWGUaAhpiOcuff141XaUzZrnL35RysfuYW0HALxS+Yvhde28b3ccaOniGq5/Ow9ijR+cfAtkcXcIO28wWcRj0QNHK+YezflI3fTAMBt2ckpoOQ2BN3Aywsfi7jBqvRwe+PSnAK/XKEbuZ7u9+RBxD5fqQGHAw5+v11kzPslMNdfu4YQIhzvF8dANl/GR7OBDmsU9BjzwqTnRa/HcI0NL8I1cV0pxNzKASbnGPR//FH4HBliwzj2J5VKSjd8yTr1xhH2AB53XvAzuN+FWOCyT4MhdAYPFWE2czTgCTsTs47XbohcQGtcJ416cmj8V4K6Qlnh2URU+WLEdL80YHrLMiVfj4DrVdFStzMr1oxRrAoV1R3ve8mHkO/MgTty6T3+zKaCBVckL323Gc4s2B5Tx49a9OBmWYXG3wK1va88LKmPLbEhWt7dQgXJbTi9rfpCWE/eulrADwM1vrQwt5NPmWpIcjJ1wWMYhnOgtY7cbaMRGHuRoDTc0bvIpjA3ZBtIKOBmWYXF3GDtvLK3LIFoC4gahYszD7SCxwUiSuWRuUI0/7DxlZsIhAXlIwnZbNLdvDsu4m8e+2KhaztruXthzT3Skm9PIhRAQI1e5qWsPHsPseas1Byfp1ccAe+saDE9qDRHdB+LPtep28UM5Nhg56jyIKcHxh9xtuBBufmsFnvxyExaur42oPpYJ4IKnvjOUcfHnXYfw6vfVYddzGm43iREGjjuHZeIYO1LgRlqHmng3NDVbqpPxHcOF68MPrNp1qB73f2rPQC5L8KmOCUbemJxMI2Fkmr25RLSTiFYqym4noq1EtFT6m6xYdhMRbSCidUR0qlOGux0nzpkjPXDM2mBy/YamZvznS29NoysE0KiRR92NcFgmMTHiuT8NYKJK+f1CiIHS3/sAQES94ZtbtY+0zaPyhNlM5GjdmrG4Zc3u8/lvN+P/5q1xxJZY4uSxt/util/SYkOsj3tYcRdCLARgdJLrqQBeFkIcE0JsArABwFAL9sU9dpxf+SLZpNFgZjdfKkIOVoVm/5EGq+YwFmFtjw2xPu5WYu5XEtFyKWzTWirrAECZ8LpaKguBiGYQUSURVdbWuicxlH1EHkP5ccteVO+tAwB8snoHjjT4Jl946PMNlvdgJMZ3wVOLI6hZHZ7qzTx2e3zcvhIbYn3cIxX3xwB0AzAQQA2Ae6VyNeVQ/YVCiCeEEBVCiIrCwsIIzfAmZz36DUb+az6WbN6Ly56txCdRzJ0ejNXr06va7uSNa3fNXj0HbmbxX8fHp+cuhNghhGgSQjQDeBItoZdqAKWKVTsC2GbNxMRl274jttRj5iKza58yTR70Gnce1M7twjAAAIqDmLsaRNRe8fUsAHJPmncATCeidCLqAqAcgH3v+PGIhRMcbi5MK9eO1oV3wpzPTW+jh1fDMnvr6h2rO9av84x13JCqI2xWSCJ6CcBYAAVEVA3gNgBjiWggfPpSBeAPACCEWEVErwJYDaARwBVCiLiarXfDzkPoVpgdtf0dPNqAw8ea0C4/dBZ0u7o+WqkmWGbMCo/RkbDxRh1PQs2EIdZXflhxF0Kcp1L8lM76swHMtmJUrFi8aQ+m/XsR/jG1j211hjvBJ9+3ENsPHEXVnCkhy1JMzteppKGpGTsOHDVkg1Eiedh4VNsdfeX26CHzFCd2b4uvN+zWXE4U6gglUXTvBx6hqmDTrkMAgEU/a5804xg7i9slAVbDaFIhtdVmvbFCtdeLWX0+1tjioUYUlvFoiMHZfu4OVs7YwtzfHY8OrTI1l2emJoecxxW3n4pZk45z2LIWWNxVeH/Fdtvq0gtjhAtxGM07oaymUXINPlkd/jcYqf2R+YGZBuul9AVG8ay4e/R3MeG59bTeSE9JxkfXjkblLRNU18lOTwkZGZydnoJ+BtIA2wWLu0PYce+nRJBU6JT7vzC8biQmmn3wNXo1LuMgnC7A3bTKSgUA5KSnoCAnXXO9gaWtQspO7F6Aj68d7ZhtSljcFbihhVtJssE5uJRhmQYp50k4eYiWfLz43ZYo7Sm6OPlGwi8FseWKcd1w7pCOmsvb5YV2flDj1tP6YN7VI0PKexTnAgD6lORFZqBBeA5VhzByf4a7ifnV373wqfEu+ZmpSE9RT4n18ozhGN61raF60lKS0KdEPQzz3lUjUdomK2IbjcDiHgG3vr0SzwbNMK+FmzXAXe8p8YWbzytjDV/WT/W2JaPCHg4jU/BZhcMySgyo3dyvNhkSdjs8O7sERPkG4GD66ITCqw3FjI8GE21Fz1ziztyILO4mmfPhWlPr62lAuMvnhteWmdqXHt/+vBtls+ahlofO24Kj/dz5uRFTBLQ9dzX6Ohw7jxQWdwWGnFqDN54dPR4OHG20XAfgywQ596tNAIBl1fs11yubNc+W/SUCziYOY3WPNQ0mJmNxcjYlK7C4xzFG9UUIERKO4cZaa7Dn7m3ktBnFedpdHd0Oi7tDGLlB401gX/Bot8ZIcHSEqoN1M8ZobPaFZVKTw0ukO/12FvcA7Hi92nu4Hhc89Z0/tu3EK/Yve+rwu/8uRl298bCN2/rwxztONqhyY21sEaIlLJNmQNzD8d+Lj8fj5w+2XI9ZuCukScKJ9Uvfb8GX63cZzgsTCXd9tA4L1tXiaINOZkLWB0fhsIy3kRtUjSTvC3erj+tZZIdJpmHP3Qb088fobGdxv0a98cPHmvDhqsC0AXM+MNfrhwkknibI9hrdi3JwXLtcpKc4I18Cwp82w0hYxq3Er+UOYEQq1e47ZVk070ujIR+1zJM/7zqMX/baO+tSQuFoWMaxqj1BbkYKPpw5Gredbl9q7mAapbBMiqGYuztDnizuDsNOmDexJy20Ouy5R5fbTu8d8F2IlgbVNEVYpkRlQh03w+JuErXbzmhZyDoR3sPy5WZVA9zpb8QH31ftdaxur3vuo8oLLG1v97NvgEr2RrlB1UhYJvgNes7Z/ewxzCJhLSeiuUS0k4hWKsruJqK1RLSciN4iolZSeRkRHSGipdLf404aHwvUvCq1smgKZ6Rtty4de5HwbN59ONYmOMpvh3W2pR4nB3tdM6EcxXnpqOjcOuy6wWmtpw/t5JRZpjDiuT8NYGJQ2ScA+goh+gP4CcBNimUbhRADpb/L7TEzOqiJ3Serd5iqIxav1M3NwNZ9gfFzJ98cGGc55/FFsTbBU3QvyjG1/pR+7TG4U2t899cJyMtM9ZdrdZV26zzBYcVdCLEQwJ6gso+FEHIn628BaCc/dhGrtx3A6m0HTG1z2bOV+OKnWkW/9VCUZWYEM1LPQ77G5K3nraiJqB53XpIMo4/R6zYtJQlVc6agf0fjGRir5kxBWUG2//v4XsVht8lXPAB+Ndg9UmhHzP0SAB8ovnchoh+J6AsiGqW1ERHNIKJKIqqsra21wYzwTH7oS0x+6EvT2100dzGm/Vvbm4q1B3zoWGQ5aDgqw0SLR38bvUE8Y3sWAlBc3wbvzz+M7hpS1qUgG5//eYzudhmpyaiaMwVVc6bg3mkDTFjqLJbEnYhuBtAI4AWpqAZAJyHEIADXAXiRiFRTpgkhnhBCVAghKgoLC62YYRt6MehNuyKLgwaHS9wEx9wZs5zcO7wnGy20nKq7zwkUWCPaXjVnCm6a3Et1mdyo2mBy/uBYE7G4E9FFAE4D8FshBZqFEMeEELulz0sAbATQww5D3YyAQENTM37cslf1Qlq0Ub3bXKw9foYxS6T+gK1+hNCfStLs1MPh7kN5sFS8zQcckbgT0UQAfwFwhhCiTlFeSETJ0ueuAMoB/GyHoW5BaxDTnA/W4qxHv8G6HQdDlu865EwOdbPtB8HwwyXxmGAghqyH3tve+OO0h9knKRS3Z7tc1XVG9zD2Bh/uspVTf8jrWe3k4PfcGz3muRPRSwAWAehJRNVEdCmAhwHkAvgkqMvjaADLiWgZgNcBXC6E2KNasQuxMtJs5VZfnvTdkpAfqdfJ+2ITWrH2n3Yc5IEwjCqzz+praXu9nElPXFjh/7x+9qSAZcmK7boUZPsnh1b2eTd69+ld2mcP7hCa3tpgvVqkxqnnHjZxmBDiPJXipzTWfQPAG1aN8gKLq1qeaWYuCTOZHrXYecDYmwLH3BOPSM95EvkGV+ltn6zwzoMH/yQHxUoyUn0TUF91Ujm+XL8rItuaVcQ2PSXJ32XRrsF+cmZIedRqvMAjVG3ATid5wr1f2FcZw0gU5Ngz6USkb7dJGoFwZbHZ+2j/kQaV+siQ5/7ulSMN7ydVSkFwkk7YyY2wuNuAiLDHutrgh237Q5N8KWloajbUan84CmEhJg6J4EK9dkIPf0oE2eM2vdsg5bYSNpTvtk5tsgD4wjwyyUlkKN12YW7Lw64gJ013XSLCN7NOwoPTB0VibsxgcVcQ7TDFzFeWmt5m7N0L8P6K7brrGH3UxFkIkbHA8WW+YfTpEYhzu/wWIcxMMy4Zyr7tWpea2j13/Sk98Mex3TTr7d/Rlwtm6sASfHDNqICG3CRqebcw2qDauW227nIAKGmVGfGDLVawuNuAEIjIIzKb2gAw1m/+wBFjcfuFP0Vn8BgTe+6bNhDzrh4ZMJrSKMpQTEqScclobzKLoiz05cW5uj1T5CyORIRe7fMCnJSuhdkhnnui+jA8E5MN9LntI79n5IZOKk51vfQCcsNgopGZlow+JcaH4QfTLi9DdV4APcweZvneIQDFeb4HQ1pKEuqDhD49JdCDlt9Uxx9XhAuGd8Yxaf1wI1Q/vW40ctLNP+ziBRZ3mwi+AGPJu8u2xdoE15KSHCoWTHjeuepEVO89Evm1JXS/BkBEuGRkF3QtzMa7y7bhf0v19yk/FE7sXqCa3EsrTNm9SL2/vVfgsIxNyH3Ov9sU2q3/6pd+xP2f/BQ1Wyo3O5drnPE2Wkm2inIzMLhT+PS3SpRvsVlpWvFq9Yau5CTC+F7FKMoLH9qRY+qyrsv/5QbXRIXF3SbCJe968LP1uOzZSs3ld7y72m6TGDUSMCRjhm6Foelxs9NbXvAj7Qo5tEsbPH/pMHxxw1gAxsOX104In71ErkqOtaenJOOJC4bghcuGae4ruN+9F/GcuL+3fBtG/uvzqOdYNrI/vQbUuV9vstMcJkJyMxInUqnsQigTLHlnDizBpL7tdOv5x5l98Zhq1kffPTGoUysQEUaWF+j2TJFzpysnvk4zMAl2c5DnDgCn9GmHolyf1x8s7vdNGxDQFdKreE7cZ72xAtV7j+BwBCM9tZLxG8GtCfsTlXvOVU+9mpeZOOIdjtP7tw8pC74Hzh/eOWAAktotUto6E5P6+ep66LxB+OTa0QCAZKlnTU66sWP+9zP64JYpvUxPw5cpdVFM13gQBMfcz3ZRznUn4StdgZUXNdb22JOaTP65L88Z0hHXv7YsYPmEXsW4ZUovjL1ngWYdGanJOHjUegqIeMVqtOKMASX+zwM65uPGiT0xraJUc33lwyInPQW/HxWYU92IOTMn9EB2eoqmaMue+w2n9oyoK2i84jnPPVY0u6EPJBPALVMC83P/56KKgFl21Hjg1wOdNMn12DmQj4jwp7HdVVMfqN0tRkIwamSnp2DmhB6ak1nL++pWmIPzh9szf2s84Flxj3ZzCWu7+wj2AtV4cHqgmBclQCxWDyuZUSPbX5jltj5s7KsrHvCcuMcq1S177tHj8fP1p2wz2hPikd8MxtSBHewwKa649bTe+I8iPa+ScANQ1Y6slbYqAJg5oRyv/mGEpTr0SNRb03PiLvPct5uxonq/qW2sXKMs7nSzpakAABqTSURBVNFjYt/2uOtX/TFr0nGqy5MNnsgpKg2K0cTJKesGdMzH2YPVH1yXjOyCCb2LVUMjM4O6Hjp1Vd9zTn9M7tcOfTvkY+aEHhjapY1DewLkX5Fgjrt3xf2uD9fh9Ie/MrTu/iMNeODTnyz1eDnawKMeo8m040tx+ZhuqhMvx8vr92kOPlx6tsvFfdP02w8uGN4ZA0tbBZQVhxk0JB/b3u1Vp0Y2THlxLh797RDNOHnL/giXjuyC+yxMPO1PaxAvF4ZNeE7cI5Hn2fNW44FP1+PTNTttt4dxlsn9WgRSfjanRNjl4+HfxFdKV5lHfqPygDPgpxblZeB/V5zo/37dyeEHDF04ogyd22bhvxcf7++y6LRk/u203pa6L153Sg90LczGsK5Ovh24DyPT7M0lop1EtFJR1oaIPiGi9dL/1oplNxHRBiJaR0SnOmW4Ub77OXRy6h1BCZDqpNzn8TZHIuNjbE/f3Jvym5fWxBDhOK1/iesHsI4qL8CgToHetlp4KRIn9erx5WHXKW2ThS9uGBfWw3cTfUry8fmfxyIvI3G6QQLGPPenAUwMKpsF4DMhRDmAz6TvIKLeAKYD6CNt86g8YXas+PUT34aUBYfH5de1yKbcYOxmjMGJkmUe/HWgxx0vQ8sjCRM8d+kwXDmuu6129CnJw5908qcz8UlYcRdCLAQQnA1rKoBnpM/PADhTUf6yEOKYEGITgA0Ahtpkq20o76mFP9Vi7+F6Q9tt2HnQIYsYJQ+ZnfFGOp8Zqb7L+cZTQxtazx3ivlGJcpposxiZacjMc2Pe1aNw40T1xmkmfok05l4shKgBAOm/PBVKBwC/KNarlspCIKIZRFRJRJW1tfZNGmGk04p84Tc0NePCuYvx1YZdhradcN9Ci9YxSpIIIV3yquZMQX5WZK/PqUlJqJozBb8Z1ilk2d0a6QgA4Lyhpf55Mu3gvKGh+w+mas4UtM/PjGwHUXwx0bsnuIOYu7G7QVXtslO9BIQQTwghKoQQFYWF5l7DrSI3NgV3X+RrNXoM7tQKP985xZa65HSyRgYtqXHn2f2xfvZkW2y5Ylw33Hl2P1vq0sKYtkfvCZBgnVDihkhzy+wgovZCiBoiag9A7mZSDUCZSKIjgKjOHGEkbi6vE+x5rNxqrl98JNTs154mb39d6GzuXiXSB+mEXqEz0Kcm+zx2NxCNHEPR7NLXua12TvQzBpbgqw270FUlTTATeyL13N8BcJH0+SIAbyvKpxNROhF1AVAOYLE1E+1nf10DqvfWhZTX7Dc3jVgkjLjzc81lUx8x1i8/kXlSY2SlFmYHxxSq5EExQ3MU1L1rmPw4drHxn5N1e8VMqyjFxn9ORodWEYaXTHKKg4O+vEhYz52IXgIwFkABEVUDuA3AHACvEtGlALYAOBcAhBCriOhVAKsBNAK4QgjR5JDtEXPy/b7Y+dp/BHcCii1Vu0MfOF4lUt/TrNf64u+HoclEcLh1dhrW3DERjy7YgP/3+Qaz5jma+lkW0dI2WVh9x6nofetHmuva4dwb6XUUzZ5Jj50/BA1N3F3ZKGHFXQhxnsai8RrrzwYw24pRTOLgdIQhJTnJdOwxMy3Z9MOndVYq9tY1oNFBcVdOVZeVpv+rjKZgiCeSkwjJSTHtWR1XeG6Eqhm4tT9xaZ8f2SCcN/90gmq5nJNFzXOXM03+74oTMSBouL8Wc1QaZcMN1VcSJ139GQfx3GQdLNjxRSwczMV/HY9MzQmb9VGb7KFj60ykSF0p1Tx3+Te2y8vAS5cNwz6NhvOFN4zD6LvnAwB6qeRuMdJ+UNG5NSo37414lC7jHRLbc+fOj66hT0keFt+sGumznaK8DOSGG4oe9NSR59wMlswPrhmFD64Z5c9n09Tsiwkvuukk1Wqz0lJQotEA2UnRM2VAaSu8f/Uo3DG1DwDfDEc3B00+olpHG18dXgzLMObwnuduZl3WdtdQmJvun9DYDSilsVVWKnLSU1B78FjIerKHLY8alT135QClSK+z3iV56F2Shz4l+RhY2spQ46XceCyv+82sk9iFSVA8J+5m6HObdm8DJnJy01Nw8Jj+PKRyr5doz/xjFGVMXqt740czR/s/F0ldBvW6BUbqTA/pHD5NQW5GCg4ebfTH/OXjq/WWoEcS8ZzAXsB74s4XZcwxImIdWweKjtveoqZVlCIlOQnXv7ZM0zblAJ8xPQrxnwsrMKZn6GjraPy0jq2zsKbmgF/cTbS9hvD1rJOw40DoWwoTXyREzL1s1ryYTb/nJe6Y2ge3nd477HpG+qL/8yypN4g7HXckJREm92sHAOjQOlP1+glO4DWhd7Fqjxb/ZBEG9jvv6pH44JpRpu2V8Yu7hZh7+/zMkEk8mPgjIcSdsYeuBTno1yHflrqy06WXRhc/c7PSUvD4+YPx7KUtiU2VD65w+vncpUMx//qxLQUq67975ciA731K8lV7yoRDrlrOl8S9ZRjvhWU0YMfdHozEYs04jekpPv8iN8Odl+LEvtpT4YVLvTuqPHxCvH4d7XlYymSk+rp45qS783gy0SNhrgDWdnswMhG4GZ9xRLe2uHlyL0yrKA2/ssuItW887+qRqNrlS1khP2cuPrELerXPw4UjymJnGOMKPCfuWn3XOeZuHQFh0HM3LntEhMtGR5aqN9YY/Zny24ndPYP6lOSjT0l+gC1pyUm4wuaZmpj4xHPirgVLuz0YmgzFeTOijlofc6MPsed/PwzvLduGgpw0u81iGE0816DKDrpzjOja1lhYxoPq/p+LjscfRndFmU5+cy26FGTjqvHlUcnDzqOuGRnPee5aWflY9K2TkpxkcHCL99S9S0E2bpocfvh/JDx+/hBs3ac9iYsR3DoYjIkdnhN3LdijsQcjbRde9NydZGLfdrbVxU4MI+O5sIwWfNEb59bTtAcqWY25X39KD1wZ5w1+/75giOtmBfq/M/vi+LLW6NkuN9amMC4hYcSdsca1Ur5yrZj7uUM6+j//65z+mvVceVI5rj+1p73GRZlT+7TDEyan+3OaAaWt8NrlJ/j7uTNMxOJORD2JaKni7wARzSSi24loq6LcnmnlGcc5fUCJavlvhnXCNRPKAWgPYrr73AH+z+N6tkxiPbpHIf44tpt9RjIMY4iIY+5CiHUABgIAESUD2ArgLQAXA7hfCHGPLRbaRCKEZVKSyNI0b3I8XS9mbqS3jJJnL/EN3X9swcaI7WIYxjx2hWXGA9gohNhsU30RseyXfZrLEqFBNSCPiUlGlRf4P+vFzDmhFMPEB3aJ+3QALym+X0lEy4loLhGpJqMmohlEVElElbW1tbYYMfWRrzWXec1zV5vuLdJkUWcOLMFzlw7DOVLcfLBO/vDivAxUzZmiumxAR+2kV7npKZg6UD3swzCM/VgWdyJKA3AGgNekoscAdIMvZFMD4F617YQQTwghKoQQFYWF4RMsJRpf3DBWc1mb7DQsu+2UkPJIeyA+MH0QAGBszyJUzZnin6rNLG9fqZ2udsXfT8WD0n4YhnEeOzz3SQB+EELsAAAhxA4hRJMQohnAkwCG6m4dJeLNcU8i0vSQ5Th2MHb1Lw+ePNprbz0MkwjYIe7nQRGSISJljtSzAKy0YR+WibfEYZGEWIJT0K6fPSmifaenJGNDhNsyDOMOLIk7EWUBOBnAm4riu4hoBREtBzAOwLVW9mEXf/ufK54xhtHTdi0PPbhYbVagFIMPjRTFtlr7O6FbW9069OYTZRjGWSylHxBC1AFoG1R2gSWLHOJ/S7fF2gRTyF74whvGYfTd8wOWpSRpPJMN6HZ6ShIa65tM2aL20rPstlOQmZqMHrd8gKFd2oQsX/n3Uw0/SBiGsZ+EyS0Tb8jeco7KDEVWJj8e07MQ76/YHnkFEnJvne9vnqA6ixLPBMQwsYXTD7gUvSnckrU8dwPNCveeOzDg+12/0k4V8CcDI0sLc9N5yDvDuBAWd5cii7uaxGvNbN9koNFYnhVIpjg/Q3PdDq05Zs4w8QqLuw04MbmzHK5W03Etx91C5gFV4qyDEcMwCljcbaAgJz3gux1D9OWukGqTMMgNqp9eNyZA/NPCBONL22QiKYmw4Pqx/p4s3OTJMN6ExV0HoyM1lQJ7Wv/2qNAZvm8UOSyTnhp6imTPvXtRDkpbt9hYmJuuOcDp4d8MwkczRwMAygqy0aUgO8T2YHjSDYaJX1jcdTCaAVGpganJSbaIohyWCY6RA9oxd8CXYhcA2gfF0sf1LEJWmrnwEYdlGCZ+YXHXQU/chpa18c/Go+zZkppMtkyE7G9QValLWaa2q8fPH4I3/3SCbv3xkiXzinGcC55hIoE7I+ugl7LgvGGlyMtIxcerdwSJexKabGjZ1A2XhNlWbU5O7VGt2rW5ISzTLp977DBMJLDnroOeRCtFUSmCOekppie0CFf/WYM6BCxTPkyM6m+wiBsxUU5foBYaYhjG3fBdq0M4ASR/d8UW4bxmQjmabfbc5TzoPYtzcfPkXsjPCs3lbqY+AEiTBFsvQ8BZgzrgynHd437OU4ZJRDgso4NeXJqoxRtW6mNWWorqVHdDOrfGks17w+7zohGdUZSXEZD0a1R5IWaM7orLRnVFYW5gt8vkCPO33PWr/njqq00Y1lU7+VdqclLMhd0FkSGGiUvYc9chbOhCUp7UoLCFmuNuNOVwr/Z5uGJc94Cy5CTCXyf3ChF2AHjywgp0bpuFO6b20Tc1SCWL8jJw0+ReET8cGIZxN+y56xAuutLU5FshOCatFpYxGqkxm8e9a2EOvrhhnKltGIbxPuy566KvyA1NzQBCxT04LDO8axvceXY/Q3u0O03uyzOG49whHcOOXmUYxlvwHa9DuEhKvV/cA7MiXnlSYFhl7u+OR6/2eehWmB12n3aHSYZ3bYu7zx1gS997hmHiBw7L6BCuS6Oc4+VYY+DkF/LQ/mD00vh2apOFxqZmjC7nycIZhrGOJXEnoioABwE0AWgUQlQQURsArwAoA1AFYJoQInw3ERei28+dCCO7F6AoNx2Xj+mGL9fv0l5XannV88rv//VADLEhJw3DMAxgT1hmnBBioBCiQvo+C8BnQohyAJ9J313Pe1eNDCnTc9yHdWmD/KxULL55AirKjIly25w01fJlt57Cws4wjK04EXOfCuAZ6fMzAM50YB9RQav74ntXjURxXktiLr1wC9DSDVFtwuolt0yIaFBSosBNBQwTGVbFXQD4mIiWENEMqaxYCFEDANL/IrUNiWgGEVUSUWVtba1FM5yhR3Guanmw5qvpz/EGvXmGYRgnsCruJwohBgOYBOAKIhptdEMhxBNCiAohREVhofsaETNSk1QHDamh1hPltcv1szLKxEduRncwQmc0LcMwgVgSdyHENun/TgBvARgKYAcRtQcA6f9Oq0bGgmahHXMP1nKjvRc5P7o14iVNMcO4gYjFnYiyiShX/gzgFAArAbwD4CJptYsAvG3VSLtR88iDhVcI4e8K+dB5g3TXNduHfEyPljcVFnx99FISMwyjjRXPvRjAV0S0DMBiAPOEEB8CmAPgZCJaD+Bk6burUJOLYK+wqbmlJHgEalGesXCNf3/SDuVZkm49vbc/jW9WWrLWZgzDMBETcT93IcTPAAaolO8GMN6KUU5D5JtMWh5hCoR60L6wjK8wuDeMsqeMGS45sQynD2iPotwM3H1Of9wypRey03kcGcMw9pOw6Qcq/zYBP/7tZP93teiInCLGaExdq8HPnxqYCEW5vgdDSnIS2uaYewNgGIYxSkK6jQRCXkZg33K1Pu3NKp671qxEX944DgUs1o7Sv2OrWJvAMHFDYoq7iieu57krg/TfzDpJtc7SNlmm9seYo1+HfNzIM0IxjGESMiyjprXBnjygHnM3E0rp1T5Pc3+MOfqU5CGF0xYzjGE8e7dkpCZhXE/1wVFqXRe7F+Xgxd8PCygTJmPuwbw8YzhemTGcRckC/NbDMJHhWdXJzUg1nYzrhO4F/rBLYW66aszdDPmZqbpzlDLhaZXpe6MyOlqYYRgfno25CyH8nndF59Y4Z0hHzHpzRdjtSlpl4umLj0fPdrm48/21AIDMtGTMv34sdhw46qTJjAoT+7bDfdMG4LT+JbE2hWHiCk+I+9i75+PyMd0CypSdX4Z3bYvuRTn+7+XFOdBjbE9frrPZZ/XFyO4FGFTaCkSkOQkH4xxEhLMHd4y1GQwTd3hC3Kt214V45QItPWCIgIzUlpGgD04PTCegRW5GKqYdX2qTlQzDMNEj7mPuWjnXlWEZgq+BFQC6FWYjP5PzpzMM423iWtzXbT+IyQ99pbrM57m3CH/wJNYMwzBeJq7DMkcbmrCm5oDqMqFM2UuEjq0zcdVJ3f0JuxiGYbxMXIu7XhdFZbiG4GuY+/MpPMKRYZjEIK7FXa/7ubJBVY/nLh2KL9fvssskhmEYVxDX4q47uEjAH5fRW21UeSFGlbtvmj+GYRgrxHWDarJOXoCArpCc3YVhmAQjrsVdL+dLQMydtZ1hmATDyhyqpUQ0n4jWENEqIrpGKr+diLYS0VLpb7J95obYoLlMOcE1azvDMImGlZh7I4A/CyF+kCbKXkJEn0jL7hdC3GPdPH30PPdmIfz93NlzZxgm0bAyh2oNgBrp80EiWgMgqp3IdbtCInReVIZhmETBlpg7EZUBGATgO6noSiJaTkRziUg17y4RzSCiSiKqrK2tjWi/4XrLtIxhYtedYZjEwrK4E1EOgDcAzBRCHADwGIBuAAbC59nfq7adEOIJIUSFEKKisDCyroh6mn3xyDKM6eGrd0Q3zqnOMExiYamfOxGlwifsLwgh3gQAIcQOxfInAbxnyUIdknSC7jdN6gUAqJozxandMw4yoLQVlv2yL9ZmMEzcErG4ky/W8RSANUKI+xTl7aV4PACcBWClNRO1iXT6O8b9vPnHE9DUzI0mDBMpVjz3EwFcAGAFES2Vyv4K4DwiGghfyLsKwB8sWahDMsfSPUtyEukOUmMYRh8rvWW+gnoX8vcjN8cc3FDKMAyjjmdHqDIMwyQycS7u6upenJceZUsYhmHcRVyLe7PGKCXd/u8MwzAJQFyLu1aDG4s7wzCJTlyLe6usNNVy1naGYRKduBZ3ACjJzwgpY8+dYZhEJ+7FXS3qzr1oGIZJdOJe3Ntkh4Zm9NISMAzDJAJxL+5zf3c8fl1RGlCWl5EaI2sYhmHcQdyLe3FeBq48qbv/+xXjuuHx84fE0CKGYZjYYykrpFvo2DoT153cA2cN6oDSNlmxNodhGCbmeELciQhXjy+PtRkMwzCuIe7DMgzDMEwoLO4MwzAehMWdYRjGg7C4MwzDeBAWd4ZhGA/C4s4wDONBWNwZhmE8CIs7wzCMByGhMZtRVI0gqgWw2UIVBQB22WSOnbBd5mC7zONW29guc0RqV2chRKHaAleIu1WIqFIIURFrO4Jhu8zBdpnHrbaxXeZwwi4OyzAMw3gQFneGYRgP4hVxfyLWBmjAdpmD7TKPW21ju8xhu12eiLkzDMMwgXjFc2cYhmEUsLgzDMN4kLgWdyKaSETriGgDEc2K8r5LiWg+Ea0holVEdI1UfjsRbSWipdLfZMU2N0m2riOiUx20rYqIVkj7r5TK2hDRJ0S0XvrfOpp2EVFPxTFZSkQHiGhmrI4XEc0lop1EtFJRZvoYEdEQ6VhvIKKHiMjS7Owadt1NRGuJaDkRvUVEraTyMiI6ojh2j0fZLtPnLkp2vaKwqYqIlkrl0TxeWvoQvWtMCBGXfwCSAWwE0BVAGoBlAHpHcf/tAQyWPucC+AlAbwC3A7heZf3eko3pALpItic7ZFsVgIKgsrsAzJI+zwLwr2jbFXTutgPoHKvjBWA0gMEAVlo5RgAWAxgBgAB8AGCSA3adAiBF+vwvhV1lyvWC6omGXabPXTTsClp+L4BbY3C8tPQhatdYPHvuQwFsEEL8LISoB/AygKnR2rkQokYI8YP0+SCANQA66GwyFcDLQohjQohNADbA9xuixVQAz0ifnwFwZgztGg9goxBCb1Syo3YJIRYC2KOyT8PHiIjaA8gTQiwSvrvwWcU2ttklhPhYCNEoff0WQEe9OqJllw4xPV4ykoc7DcBLenU4ZJeWPkTtGotnce8A4BfF92roi6tjEFEZgEEAvpOKrpReoecqXruiaa8A8DERLSGiGVJZsRCiBvBdeACKYmCXzHQE3nCxPl4yZo9RB+lzNG28BD7vTaYLEf1IRF8Q0SipLJp2mTl30T5eowDsEEKsV5RF/XgF6UPUrrF4Fne1uFPU+3USUQ6ANwDMFEIcAPAYgG4ABgKoge+1EIiuvScKIQYDmATgCiIarbNuVI8jEaUBOAPAa1KRG45XOLRsifaxuxlAI4AXpKIaAJ2EEIMAXAfgRSLKi6JdZs9dtM/peQh0IqJ+vFT0QXNVDRsiti2exb0aQKnie0cA26JpABGlwnfiXhBCvAkAQogdQogmIUQzgCfREkqImr1CiG3S/50A3pJs2CG94smvoTujbZfEJAA/CCF2SDbG/HgpMHuMqhEYInHMRiK6CMBpAH4rvZ5DeoXfLX1eAl+ctke07Irg3EXzeKUAOBvAKwp7o3q81PQBUbzG4lncvwdQTkRdJG9wOoB3orVzKZ73FIA1Qoj7FOXtFaudBUBuxX8HwHQiSieiLgDK4WsosduubCLKlT/D1xi3Utr/RdJqFwF4O5p2KQjwpmJ9vIIwdYyk1+qDRDRcuh4uVGxjG0Q0EcBfAJwhhKhTlBcSUbL0uatk189RtMvUuYuWXRITAKwVQvhDGtE8Xlr6gGheY1ZahGP9B2AyfK3QGwHcHOV9j4Tv9Wg5gKXS32QAzwFYIZW/A6C9YpubJVvXwWJrvI5dXeFrdV8GYJV8XAC0BfAZgPXS/zbRtEvaTxaA3QDyFWUxOV7wPWBqADTA5x1dGskxAlABn6htBPAwpFHfNtu1Ab54rHydPS6t+yvpHC8D8AOA06Nsl+lzFw27pPKnAVwetG40j5eWPkTtGuP0AwzDMB4knsMyDMMwjAYs7gzDMB6ExZ1hGMaDsLgzDMN4EBZ3hmEYD8LizjAM40FY3BmGYTzI/wdiRxsCNG7avgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train & verify ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('verify episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 195:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
